1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK45-64
5 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32
7 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32-EX
9 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
10 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64
11 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
12 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32
13 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX
15 // expected-no-diagnostics
19 // Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
34 #pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)
35 for(int i
= 0; i
< n
; i
++) {
40 #pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)
41 for(int i
= 0; i
< n
; i
++) {
45 #pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)
46 for(int i
= 0; i
< 10; i
++) {
50 #pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)
51 for(int i
= 0; i
< M
; i
++) {
52 for(int j
= 0; j
< M
; j
++) {
54 c
[i
][j
] = i
+ j
* f
+ k
;
64 a
+= ftemplate
<int>(n
);
70 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
71 // CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
72 // CHECK45-64-NEXT: entry:
73 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
74 // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
75 // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
76 // CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
77 // CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
78 // CHECK45-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
79 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
80 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
81 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
82 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
83 // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
84 // CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
85 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
86 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
87 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
88 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
89 // CHECK45-64: user_code.entry:
90 // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
91 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
92 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
93 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
94 // CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
95 // CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
96 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
97 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
98 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
99 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
100 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
101 // CHECK45-64-NEXT: ret void
102 // CHECK45-64: worker.exit:
103 // CHECK45-64-NEXT: ret void
106 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
107 // CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
108 // CHECK45-64-NEXT: entry:
109 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
110 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
111 // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
112 // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
113 // CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
114 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
115 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
116 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
117 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
118 // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
119 // CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
120 // CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
121 // CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
122 // CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
123 // CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
124 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
125 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
126 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
127 // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
128 // CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
129 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
130 // CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
131 // CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
132 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
133 // CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
134 // CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
135 // CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
136 // CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
137 // CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
138 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
139 // CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
140 // CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
141 // CHECK45-64: omp.precond.then:
142 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
143 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
144 // CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
145 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
146 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
147 // CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
148 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
149 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
150 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
151 // CHECK45-64: omp.dispatch.cond:
152 // CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
153 // CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
154 // CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
155 // CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
156 // CHECK45-64: cond.true:
157 // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
158 // CHECK45-64-NEXT: br label [[COND_END:%.*]]
159 // CHECK45-64: cond.false:
160 // CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
161 // CHECK45-64-NEXT: br label [[COND_END]]
162 // CHECK45-64: cond.end:
163 // CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
164 // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
165 // CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
166 // CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
167 // CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
168 // CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
169 // CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
170 // CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
171 // CHECK45-64: omp.dispatch.body:
172 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
173 // CHECK45-64: omp.inner.for.cond:
174 // CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
175 // CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
176 // CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
177 // CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
178 // CHECK45-64: omp.inner.for.body:
179 // CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
180 // CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
181 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
182 // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
183 // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
184 // CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
185 // CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
186 // CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
187 // CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
188 // CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
189 // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
190 // CHECK45-64: omp.body.continue:
191 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
192 // CHECK45-64: omp.inner.for.inc:
193 // CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
194 // CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
195 // CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
196 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
197 // CHECK45-64: omp.inner.for.end:
198 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
199 // CHECK45-64: omp.dispatch.inc:
200 // CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
201 // CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
202 // CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
203 // CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
204 // CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
205 // CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
206 // CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
207 // CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
208 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
209 // CHECK45-64: omp.dispatch.end:
210 // CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
211 // CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
212 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
213 // CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
214 // CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
215 // CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
216 // CHECK45-64: .omp.final.then:
217 // CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
218 // CHECK45-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
219 // CHECK45-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
220 // CHECK45-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
221 // CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
222 // CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
223 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
224 // CHECK45-64: .omp.final.done:
225 // CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
226 // CHECK45-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
227 // CHECK45-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
228 // CHECK45-64: .omp.lastprivate.then:
229 // CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
230 // CHECK45-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
231 // CHECK45-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
232 // CHECK45-64: .omp.lastprivate.done:
233 // CHECK45-64-NEXT: br label [[OMP_PRECOND_END]]
234 // CHECK45-64: omp.precond.end:
235 // CHECK45-64-NEXT: ret void
238 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
239 // CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
240 // CHECK45-64-NEXT: entry:
241 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
242 // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
243 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
244 // CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
245 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
246 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
247 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
248 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
249 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
250 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
251 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
252 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
253 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
254 // CHECK45-64: user_code.entry:
255 // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
256 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
257 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
258 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
259 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
260 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
261 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
262 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
263 // CHECK45-64-NEXT: ret void
264 // CHECK45-64: worker.exit:
265 // CHECK45-64-NEXT: ret void
268 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
269 // CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
270 // CHECK45-64-NEXT: entry:
271 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
272 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
273 // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
274 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
275 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
276 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
277 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
278 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
279 // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
280 // CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
281 // CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
282 // CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
283 // CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
284 // CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
285 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
286 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
287 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
288 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
289 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
290 // CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
291 // CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
292 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
293 // CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
294 // CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
295 // CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
296 // CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
297 // CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
298 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
299 // CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
300 // CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
301 // CHECK45-64: omp.precond.then:
302 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
303 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
304 // CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
305 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
306 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
307 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
308 // CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
309 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
310 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
311 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
312 // CHECK45-64: omp.dispatch.cond:
313 // CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
314 // CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
315 // CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
316 // CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
317 // CHECK45-64: cond.true:
318 // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
319 // CHECK45-64-NEXT: br label [[COND_END:%.*]]
320 // CHECK45-64: cond.false:
321 // CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
322 // CHECK45-64-NEXT: br label [[COND_END]]
323 // CHECK45-64: cond.end:
324 // CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
325 // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
326 // CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
327 // CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
328 // CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
329 // CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
330 // CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
331 // CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
332 // CHECK45-64: omp.dispatch.body:
333 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
334 // CHECK45-64: omp.inner.for.cond:
335 // CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
336 // CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
337 // CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
338 // CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
339 // CHECK45-64: omp.inner.for.body:
340 // CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
341 // CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
342 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
343 // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
344 // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
345 // CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
346 // CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
347 // CHECK45-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
348 // CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
349 // CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
350 // CHECK45-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
351 // CHECK45-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
352 // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
353 // CHECK45-64: omp.body.continue:
354 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
355 // CHECK45-64: omp.inner.for.inc:
356 // CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
357 // CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
358 // CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
359 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
360 // CHECK45-64: omp.inner.for.end:
361 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
362 // CHECK45-64: omp.dispatch.inc:
363 // CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
364 // CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
365 // CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
366 // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
367 // CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
368 // CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
369 // CHECK45-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
370 // CHECK45-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
371 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
372 // CHECK45-64: omp.dispatch.end:
373 // CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
374 // CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
375 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
376 // CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
377 // CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
378 // CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
379 // CHECK45-64: .omp.final.then:
380 // CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
381 // CHECK45-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
382 // CHECK45-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
383 // CHECK45-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
384 // CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
385 // CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
386 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
387 // CHECK45-64: .omp.final.done:
388 // CHECK45-64-NEXT: br label [[OMP_PRECOND_END]]
389 // CHECK45-64: omp.precond.end:
390 // CHECK45-64-NEXT: ret void
393 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
394 // CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
395 // CHECK45-64-NEXT: entry:
396 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
397 // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
398 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
399 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
400 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
401 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
402 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
403 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
404 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
405 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
406 // CHECK45-64: user_code.entry:
407 // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
408 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
409 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
410 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
411 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
412 // CHECK45-64-NEXT: ret void
413 // CHECK45-64: worker.exit:
414 // CHECK45-64-NEXT: ret void
417 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
418 // CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
419 // CHECK45-64-NEXT: entry:
420 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
421 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
422 // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
423 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
424 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
425 // CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
426 // CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
427 // CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
428 // CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
429 // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
430 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
431 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
432 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
433 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
434 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
435 // CHECK45-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
436 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
437 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
438 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
439 // CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
440 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
441 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
442 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
443 // CHECK45-64: omp.dispatch.cond:
444 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
445 // CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
446 // CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
447 // CHECK45-64: cond.true:
448 // CHECK45-64-NEXT: br label [[COND_END:%.*]]
449 // CHECK45-64: cond.false:
450 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
451 // CHECK45-64-NEXT: br label [[COND_END]]
452 // CHECK45-64: cond.end:
453 // CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
454 // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
455 // CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
456 // CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
457 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
458 // CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
459 // CHECK45-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
460 // CHECK45-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
461 // CHECK45-64: omp.dispatch.body:
462 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
463 // CHECK45-64: omp.inner.for.cond:
464 // CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
465 // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
466 // CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
467 // CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
468 // CHECK45-64: omp.inner.for.body:
469 // CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
470 // CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
471 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
472 // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
473 // CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
474 // CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
475 // CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
476 // CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
477 // CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
478 // CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
479 // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
480 // CHECK45-64: omp.body.continue:
481 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
482 // CHECK45-64: omp.inner.for.inc:
483 // CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
484 // CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
485 // CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
486 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
487 // CHECK45-64: omp.inner.for.end:
488 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
489 // CHECK45-64: omp.dispatch.inc:
490 // CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
491 // CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
492 // CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
493 // CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
494 // CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
495 // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
496 // CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
497 // CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
498 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
499 // CHECK45-64: omp.dispatch.end:
500 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
501 // CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
502 // CHECK45-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
503 // CHECK45-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
504 // CHECK45-64: .omp.final.then:
505 // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
506 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
507 // CHECK45-64: .omp.final.done:
508 // CHECK45-64-NEXT: ret void
511 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
512 // CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] {
513 // CHECK45-64-NEXT: entry:
514 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
515 // CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
516 // CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
517 // CHECK45-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
518 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
519 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
520 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
521 // CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
522 // CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
523 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
524 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
525 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
526 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
527 // CHECK45-64: user_code.entry:
528 // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
529 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
530 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
531 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
532 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
533 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
534 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
535 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
536 // CHECK45-64-NEXT: ret void
537 // CHECK45-64: worker.exit:
538 // CHECK45-64-NEXT: ret void
541 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
542 // CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] {
543 // CHECK45-64-NEXT: entry:
544 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
545 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
546 // CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
547 // CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
548 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
549 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
550 // CHECK45-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
551 // CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
552 // CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
553 // CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
554 // CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
555 // CHECK45-64-NEXT: [[K:%.*]] = alloca i32, align 4
556 // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
557 // CHECK45-64-NEXT: [[J:%.*]] = alloca i32, align 4
558 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
559 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
560 // CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
561 // CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
562 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
563 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
564 // CHECK45-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
565 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
566 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
567 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
568 // CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
569 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
570 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
571 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
572 // CHECK45-64: omp.dispatch.cond:
573 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
574 // CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
575 // CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
576 // CHECK45-64: cond.true:
577 // CHECK45-64-NEXT: br label [[COND_END:%.*]]
578 // CHECK45-64: cond.false:
579 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
580 // CHECK45-64-NEXT: br label [[COND_END]]
581 // CHECK45-64: cond.end:
582 // CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
583 // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
584 // CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
585 // CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
586 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
587 // CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
588 // CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
589 // CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
590 // CHECK45-64: omp.dispatch.body:
591 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
592 // CHECK45-64: omp.inner.for.cond:
593 // CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
594 // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
595 // CHECK45-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
596 // CHECK45-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
597 // CHECK45-64: omp.inner.for.body:
598 // CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
599 // CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
600 // CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
601 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
602 // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
603 // CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
604 // CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
605 // CHECK45-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
606 // CHECK45-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
607 // CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
608 // CHECK45-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
609 // CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
610 // CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
611 // CHECK45-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
612 // CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
613 // CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
614 // CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
615 // CHECK45-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
616 // CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
617 // CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
618 // CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
619 // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
620 // CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
621 // CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
622 // CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
623 // CHECK45-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64
624 // CHECK45-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
625 // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP28]]
626 // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
627 // CHECK45-64: omp.body.continue:
628 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
629 // CHECK45-64: omp.inner.for.inc:
630 // CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
631 // CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1
632 // CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
633 // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
634 // CHECK45-64: omp.inner.for.end:
635 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
636 // CHECK45-64: omp.dispatch.inc:
637 // CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
638 // CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
639 // CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
640 // CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4
641 // CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
642 // CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
643 // CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
644 // CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4
645 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
646 // CHECK45-64: omp.dispatch.end:
647 // CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
648 // CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
649 // CHECK45-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
650 // CHECK45-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
651 // CHECK45-64: .omp.final.then:
652 // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
653 // CHECK45-64-NEXT: store i32 10, ptr [[J]], align 4
654 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
655 // CHECK45-64: .omp.final.done:
656 // CHECK45-64-NEXT: ret void
659 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
660 // CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
661 // CHECK45-32-NEXT: entry:
662 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
663 // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
664 // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
665 // CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
666 // CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
667 // CHECK45-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
668 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
669 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
670 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
671 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
672 // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
673 // CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
674 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
675 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
676 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
677 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
678 // CHECK45-32: user_code.entry:
679 // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
680 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
681 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
682 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
683 // CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
684 // CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
685 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
686 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
687 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
688 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
689 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
690 // CHECK45-32-NEXT: ret void
691 // CHECK45-32: worker.exit:
692 // CHECK45-32-NEXT: ret void
695 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
696 // CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
697 // CHECK45-32-NEXT: entry:
698 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
699 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
700 // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
701 // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
702 // CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
703 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
704 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
705 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
706 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
707 // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
708 // CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
709 // CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
710 // CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
711 // CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
712 // CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
713 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
714 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
715 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
716 // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
717 // CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
718 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
719 // CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
720 // CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
721 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
722 // CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
723 // CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
724 // CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
725 // CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
726 // CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
727 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
728 // CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
729 // CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
730 // CHECK45-32: omp.precond.then:
731 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
732 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
733 // CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
734 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
735 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
736 // CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
737 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
738 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
739 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
740 // CHECK45-32: omp.dispatch.cond:
741 // CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
742 // CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
743 // CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
744 // CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
745 // CHECK45-32: cond.true:
746 // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
747 // CHECK45-32-NEXT: br label [[COND_END:%.*]]
748 // CHECK45-32: cond.false:
749 // CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
750 // CHECK45-32-NEXT: br label [[COND_END]]
751 // CHECK45-32: cond.end:
752 // CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
753 // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
754 // CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
755 // CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
756 // CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
757 // CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
758 // CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
759 // CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
760 // CHECK45-32: omp.dispatch.body:
761 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
762 // CHECK45-32: omp.inner.for.cond:
763 // CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
764 // CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
765 // CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
766 // CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
767 // CHECK45-32: omp.inner.for.body:
768 // CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
769 // CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
770 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
771 // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
772 // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
773 // CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
774 // CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
775 // CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
776 // CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
777 // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
778 // CHECK45-32: omp.body.continue:
779 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
780 // CHECK45-32: omp.inner.for.inc:
781 // CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
782 // CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
783 // CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
784 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
785 // CHECK45-32: omp.inner.for.end:
786 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
787 // CHECK45-32: omp.dispatch.inc:
788 // CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
789 // CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
790 // CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
791 // CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
792 // CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
793 // CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
794 // CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
795 // CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
796 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
797 // CHECK45-32: omp.dispatch.end:
798 // CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
799 // CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
800 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
801 // CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
802 // CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
803 // CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
804 // CHECK45-32: .omp.final.then:
805 // CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
806 // CHECK45-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
807 // CHECK45-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
808 // CHECK45-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
809 // CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
810 // CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
811 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
812 // CHECK45-32: .omp.final.done:
813 // CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
814 // CHECK45-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
815 // CHECK45-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
816 // CHECK45-32: .omp.lastprivate.then:
817 // CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
818 // CHECK45-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
819 // CHECK45-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
820 // CHECK45-32: .omp.lastprivate.done:
821 // CHECK45-32-NEXT: br label [[OMP_PRECOND_END]]
822 // CHECK45-32: omp.precond.end:
823 // CHECK45-32-NEXT: ret void
826 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
827 // CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
828 // CHECK45-32-NEXT: entry:
829 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
830 // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
831 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
832 // CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
833 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
834 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
835 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
836 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
837 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
838 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
839 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
840 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
841 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
842 // CHECK45-32: user_code.entry:
843 // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
844 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
845 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
846 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
847 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
848 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
849 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
850 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
851 // CHECK45-32-NEXT: ret void
852 // CHECK45-32: worker.exit:
853 // CHECK45-32-NEXT: ret void
856 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
857 // CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
858 // CHECK45-32-NEXT: entry:
859 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
860 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
861 // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
862 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
863 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
864 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
865 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
866 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
867 // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
868 // CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
869 // CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
870 // CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
871 // CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
872 // CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
873 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
874 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
875 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
876 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
877 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
878 // CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
879 // CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
880 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
881 // CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
882 // CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
883 // CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
884 // CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
885 // CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
886 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
887 // CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
888 // CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
889 // CHECK45-32: omp.precond.then:
890 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
891 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
892 // CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
893 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
894 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
895 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
896 // CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
897 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
898 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
899 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
900 // CHECK45-32: omp.dispatch.cond:
901 // CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
902 // CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
903 // CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
904 // CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
905 // CHECK45-32: cond.true:
906 // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
907 // CHECK45-32-NEXT: br label [[COND_END:%.*]]
908 // CHECK45-32: cond.false:
909 // CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
910 // CHECK45-32-NEXT: br label [[COND_END]]
911 // CHECK45-32: cond.end:
912 // CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
913 // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
914 // CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
915 // CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
916 // CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
917 // CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
918 // CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
919 // CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
920 // CHECK45-32: omp.dispatch.body:
921 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
922 // CHECK45-32: omp.inner.for.cond:
923 // CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
924 // CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
925 // CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
926 // CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
927 // CHECK45-32: omp.inner.for.body:
928 // CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
929 // CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
930 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
931 // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
932 // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
933 // CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
934 // CHECK45-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
935 // CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
936 // CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
937 // CHECK45-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
938 // CHECK45-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
939 // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
940 // CHECK45-32: omp.body.continue:
941 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
942 // CHECK45-32: omp.inner.for.inc:
943 // CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
944 // CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
945 // CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
946 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
947 // CHECK45-32: omp.inner.for.end:
948 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
949 // CHECK45-32: omp.dispatch.inc:
950 // CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
951 // CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
952 // CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
953 // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
954 // CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
955 // CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
956 // CHECK45-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
957 // CHECK45-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
958 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
959 // CHECK45-32: omp.dispatch.end:
960 // CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
961 // CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
962 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
963 // CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
964 // CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
965 // CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
966 // CHECK45-32: .omp.final.then:
967 // CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
968 // CHECK45-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
969 // CHECK45-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
970 // CHECK45-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
971 // CHECK45-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
972 // CHECK45-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
973 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
974 // CHECK45-32: .omp.final.done:
975 // CHECK45-32-NEXT: br label [[OMP_PRECOND_END]]
976 // CHECK45-32: omp.precond.end:
977 // CHECK45-32-NEXT: ret void
980 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
981 // CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
982 // CHECK45-32-NEXT: entry:
983 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
984 // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
985 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
986 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
987 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
988 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
989 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
990 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
991 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
992 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
993 // CHECK45-32: user_code.entry:
994 // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
995 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
996 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
997 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
998 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
999 // CHECK45-32-NEXT: ret void
1000 // CHECK45-32: worker.exit:
1001 // CHECK45-32-NEXT: ret void
1004 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
1005 // CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1006 // CHECK45-32-NEXT: entry:
1007 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1008 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1009 // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1010 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1011 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
1012 // CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1013 // CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1014 // CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1015 // CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1016 // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
1017 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1018 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1019 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1020 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1021 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1022 // CHECK45-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
1023 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1024 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1025 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1026 // CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1027 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1028 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1029 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1030 // CHECK45-32: omp.dispatch.cond:
1031 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1032 // CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
1033 // CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1034 // CHECK45-32: cond.true:
1035 // CHECK45-32-NEXT: br label [[COND_END:%.*]]
1036 // CHECK45-32: cond.false:
1037 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1038 // CHECK45-32-NEXT: br label [[COND_END]]
1039 // CHECK45-32: cond.end:
1040 // CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1041 // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1042 // CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1043 // CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1044 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1045 // CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1046 // CHECK45-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1047 // CHECK45-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1048 // CHECK45-32: omp.dispatch.body:
1049 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1050 // CHECK45-32: omp.inner.for.cond:
1051 // CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
1052 // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1053 // CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1054 // CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1055 // CHECK45-32: omp.inner.for.body:
1056 // CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1057 // CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1058 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1059 // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
1060 // CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
1061 // CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
1062 // CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
1063 // CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
1064 // CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
1065 // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1066 // CHECK45-32: omp.body.continue:
1067 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1068 // CHECK45-32: omp.inner.for.inc:
1069 // CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1070 // CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
1071 // CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1072 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
1073 // CHECK45-32: omp.inner.for.end:
1074 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1075 // CHECK45-32: omp.dispatch.inc:
1076 // CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1077 // CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1078 // CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
1079 // CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
1080 // CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1081 // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1082 // CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1083 // CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
1084 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
1085 // CHECK45-32: omp.dispatch.end:
1086 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1087 // CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1088 // CHECK45-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
1089 // CHECK45-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1090 // CHECK45-32: .omp.final.then:
1091 // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
1092 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
1093 // CHECK45-32: .omp.final.done:
1094 // CHECK45-32-NEXT: ret void
1097 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
1098 // CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
1099 // CHECK45-32-NEXT: entry:
1100 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1101 // CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1102 // CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1103 // CHECK45-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1104 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1105 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1106 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1107 // CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1108 // CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1109 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1110 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
1111 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1112 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1113 // CHECK45-32: user_code.entry:
1114 // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1115 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
1116 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
1117 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
1118 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1119 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1120 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
1121 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
1122 // CHECK45-32-NEXT: ret void
1123 // CHECK45-32: worker.exit:
1124 // CHECK45-32-NEXT: ret void
1127 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
1128 // CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
1129 // CHECK45-32-NEXT: entry:
1130 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1131 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1132 // CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1133 // CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1134 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1135 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
1136 // CHECK45-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1137 // CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1138 // CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1139 // CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1140 // CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1141 // CHECK45-32-NEXT: [[K:%.*]] = alloca i32, align 4
1142 // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
1143 // CHECK45-32-NEXT: [[J:%.*]] = alloca i32, align 4
1144 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1145 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1146 // CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1147 // CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1148 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1149 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1150 // CHECK45-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
1151 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1152 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1153 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1154 // CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1155 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1156 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1157 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1158 // CHECK45-32: omp.dispatch.cond:
1159 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1160 // CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
1161 // CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1162 // CHECK45-32: cond.true:
1163 // CHECK45-32-NEXT: br label [[COND_END:%.*]]
1164 // CHECK45-32: cond.false:
1165 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1166 // CHECK45-32-NEXT: br label [[COND_END]]
1167 // CHECK45-32: cond.end:
1168 // CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1169 // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1170 // CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1171 // CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1172 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1173 // CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1174 // CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1175 // CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1176 // CHECK45-32: omp.dispatch.body:
1177 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1178 // CHECK45-32: omp.inner.for.cond:
1179 // CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
1180 // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
1181 // CHECK45-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1182 // CHECK45-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1183 // CHECK45-32: omp.inner.for.body:
1184 // CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1185 // CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
1186 // CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
1187 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1188 // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1189 // CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1190 // CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1191 // CHECK45-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
1192 // CHECK45-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
1193 // CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
1194 // CHECK45-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
1195 // CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
1196 // CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1197 // CHECK45-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
1198 // CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1199 // CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1200 // CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
1201 // CHECK45-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
1202 // CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
1203 // CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
1204 // CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
1205 // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1206 // CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
1207 // CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1208 // CHECK45-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
1209 // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
1210 // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1211 // CHECK45-32: omp.body.continue:
1212 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1213 // CHECK45-32: omp.inner.for.inc:
1214 // CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1215 // CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
1216 // CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1217 // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
1218 // CHECK45-32: omp.inner.for.end:
1219 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1220 // CHECK45-32: omp.dispatch.inc:
1221 // CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1222 // CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1223 // CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1224 // CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
1225 // CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1226 // CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1227 // CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1228 // CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
1229 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
1230 // CHECK45-32: omp.dispatch.end:
1231 // CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1232 // CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1233 // CHECK45-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
1234 // CHECK45-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1235 // CHECK45-32: .omp.final.then:
1236 // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
1237 // CHECK45-32-NEXT: store i32 10, ptr [[J]], align 4
1238 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
1239 // CHECK45-32: .omp.final.done:
1240 // CHECK45-32-NEXT: ret void
1243 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
1244 // CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
1245 // CHECK45-32-EX-NEXT: entry:
1246 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1247 // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1248 // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
1249 // CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1250 // CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1251 // CHECK45-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
1252 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1253 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1254 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1255 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1256 // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
1257 // CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
1258 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
1259 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
1260 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1261 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1262 // CHECK45-32-EX: user_code.entry:
1263 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
1264 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
1265 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
1266 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
1267 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
1268 // CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
1269 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
1270 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1271 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1272 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
1273 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
1274 // CHECK45-32-EX-NEXT: ret void
1275 // CHECK45-32-EX: worker.exit:
1276 // CHECK45-32-EX-NEXT: ret void
1279 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
1280 // CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1281 // CHECK45-32-EX-NEXT: entry:
1282 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1283 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1284 // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1285 // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
1286 // CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1287 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1288 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
1289 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1290 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1291 // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
1292 // CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1293 // CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1294 // CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1295 // CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1296 // CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
1297 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1298 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1299 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1300 // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
1301 // CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
1302 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
1303 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1304 // CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1305 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1306 // CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1307 // CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1308 // CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1309 // CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1310 // CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
1311 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1312 // CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1313 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1314 // CHECK45-32-EX: omp.precond.then:
1315 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1316 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1317 // CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
1318 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1319 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1320 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1321 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
1322 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
1323 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1324 // CHECK45-32-EX: omp.dispatch.cond:
1325 // CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1326 // CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1327 // CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1328 // CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1329 // CHECK45-32-EX: cond.true:
1330 // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1331 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
1332 // CHECK45-32-EX: cond.false:
1333 // CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1334 // CHECK45-32-EX-NEXT: br label [[COND_END]]
1335 // CHECK45-32-EX: cond.end:
1336 // CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1337 // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1338 // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1339 // CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
1340 // CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1341 // CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1342 // CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
1343 // CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1344 // CHECK45-32-EX: omp.dispatch.body:
1345 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1346 // CHECK45-32-EX: omp.inner.for.cond:
1347 // CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
1348 // CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1349 // CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1350 // CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1351 // CHECK45-32-EX: omp.inner.for.body:
1352 // CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1353 // CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
1354 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1355 // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1356 // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1357 // CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
1358 // CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
1359 // CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1360 // CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
1361 // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1362 // CHECK45-32-EX: omp.body.continue:
1363 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1364 // CHECK45-32-EX: omp.inner.for.inc:
1365 // CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1366 // CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
1367 // CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1368 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1369 // CHECK45-32-EX: omp.inner.for.end:
1370 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1371 // CHECK45-32-EX: omp.dispatch.inc:
1372 // CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1373 // CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1374 // CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1375 // CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
1376 // CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1377 // CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1378 // CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1379 // CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
1380 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
1381 // CHECK45-32-EX: omp.dispatch.end:
1382 // CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1383 // CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
1384 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
1385 // CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1386 // CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
1387 // CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1388 // CHECK45-32-EX: .omp.final.then:
1389 // CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1390 // CHECK45-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
1391 // CHECK45-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
1392 // CHECK45-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
1393 // CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
1394 // CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
1395 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
1396 // CHECK45-32-EX: .omp.final.done:
1397 // CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1398 // CHECK45-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
1399 // CHECK45-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1400 // CHECK45-32-EX: .omp.lastprivate.then:
1401 // CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
1402 // CHECK45-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
1403 // CHECK45-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1404 // CHECK45-32-EX: .omp.lastprivate.done:
1405 // CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]]
1406 // CHECK45-32-EX: omp.precond.end:
1407 // CHECK45-32-EX-NEXT: ret void
1410 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
1411 // CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
1412 // CHECK45-32-EX-NEXT: entry:
1413 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1414 // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1415 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
1416 // CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1417 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1418 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1419 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1420 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1421 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
1422 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
1423 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
1424 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1425 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1426 // CHECK45-32-EX: user_code.entry:
1427 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1428 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
1429 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
1430 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
1431 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1432 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1433 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
1434 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
1435 // CHECK45-32-EX-NEXT: ret void
1436 // CHECK45-32-EX: worker.exit:
1437 // CHECK45-32-EX-NEXT: ret void
1440 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
1441 // CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
1442 // CHECK45-32-EX-NEXT: entry:
1443 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1444 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1445 // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1446 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
1447 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1448 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
1449 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1450 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1451 // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
1452 // CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1453 // CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1454 // CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1455 // CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1456 // CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
1457 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1458 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1459 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1460 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
1461 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
1462 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1463 // CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1464 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1465 // CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1466 // CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1467 // CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1468 // CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1469 // CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
1470 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1471 // CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1472 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1473 // CHECK45-32-EX: omp.precond.then:
1474 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1475 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1476 // CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
1477 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1478 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1479 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1480 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1481 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
1482 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1483 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1484 // CHECK45-32-EX: omp.dispatch.cond:
1485 // CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1486 // CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1487 // CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1488 // CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1489 // CHECK45-32-EX: cond.true:
1490 // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1491 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
1492 // CHECK45-32-EX: cond.false:
1493 // CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1494 // CHECK45-32-EX-NEXT: br label [[COND_END]]
1495 // CHECK45-32-EX: cond.end:
1496 // CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1497 // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1498 // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1499 // CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
1500 // CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1501 // CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1502 // CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
1503 // CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1504 // CHECK45-32-EX: omp.dispatch.body:
1505 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1506 // CHECK45-32-EX: omp.inner.for.cond:
1507 // CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
1508 // CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
1509 // CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1510 // CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1511 // CHECK45-32-EX: omp.inner.for.body:
1512 // CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1513 // CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
1514 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1515 // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
1516 // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
1517 // CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
1518 // CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
1519 // CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
1520 // CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
1521 // CHECK45-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
1522 // CHECK45-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
1523 // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1524 // CHECK45-32-EX: omp.body.continue:
1525 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1526 // CHECK45-32-EX: omp.inner.for.inc:
1527 // CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1528 // CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
1529 // CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1530 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
1531 // CHECK45-32-EX: omp.inner.for.end:
1532 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1533 // CHECK45-32-EX: omp.dispatch.inc:
1534 // CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1535 // CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1536 // CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1537 // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
1538 // CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1539 // CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1540 // CHECK45-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1541 // CHECK45-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
1542 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
1543 // CHECK45-32-EX: omp.dispatch.end:
1544 // CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1545 // CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
1546 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
1547 // CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1548 // CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
1549 // CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1550 // CHECK45-32-EX: .omp.final.then:
1551 // CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1552 // CHECK45-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
1553 // CHECK45-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
1554 // CHECK45-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
1555 // CHECK45-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
1556 // CHECK45-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
1557 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
1558 // CHECK45-32-EX: .omp.final.done:
1559 // CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]]
1560 // CHECK45-32-EX: omp.precond.end:
1561 // CHECK45-32-EX-NEXT: ret void
1564 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
1565 // CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
1566 // CHECK45-32-EX-NEXT: entry:
1567 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1568 // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1569 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1570 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1571 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1572 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1573 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1574 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
1575 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1576 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1577 // CHECK45-32-EX: user_code.entry:
1578 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1579 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1580 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1581 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
1582 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
1583 // CHECK45-32-EX-NEXT: ret void
1584 // CHECK45-32-EX: worker.exit:
1585 // CHECK45-32-EX-NEXT: ret void
1588 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
1589 // CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1590 // CHECK45-32-EX-NEXT: entry:
1591 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1592 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1593 // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1594 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1595 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
1596 // CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1597 // CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1598 // CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1599 // CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1600 // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
1601 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1602 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1603 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1604 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1605 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1606 // CHECK45-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
1607 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1608 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1609 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1610 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1611 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1612 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1613 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1614 // CHECK45-32-EX: omp.dispatch.cond:
1615 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1616 // CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
1617 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1618 // CHECK45-32-EX: cond.true:
1619 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
1620 // CHECK45-32-EX: cond.false:
1621 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1622 // CHECK45-32-EX-NEXT: br label [[COND_END]]
1623 // CHECK45-32-EX: cond.end:
1624 // CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1625 // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1626 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1627 // CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1628 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1629 // CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1630 // CHECK45-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1631 // CHECK45-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1632 // CHECK45-32-EX: omp.dispatch.body:
1633 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1634 // CHECK45-32-EX: omp.inner.for.cond:
1635 // CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
1636 // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1637 // CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1638 // CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1639 // CHECK45-32-EX: omp.inner.for.body:
1640 // CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1641 // CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1642 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1643 // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
1644 // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
1645 // CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
1646 // CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
1647 // CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
1648 // CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
1649 // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1650 // CHECK45-32-EX: omp.body.continue:
1651 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1652 // CHECK45-32-EX: omp.inner.for.inc:
1653 // CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1654 // CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
1655 // CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1656 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
1657 // CHECK45-32-EX: omp.inner.for.end:
1658 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1659 // CHECK45-32-EX: omp.dispatch.inc:
1660 // CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1661 // CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1662 // CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
1663 // CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
1664 // CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1665 // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1666 // CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1667 // CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
1668 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
1669 // CHECK45-32-EX: omp.dispatch.end:
1670 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1671 // CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1672 // CHECK45-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
1673 // CHECK45-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1674 // CHECK45-32-EX: .omp.final.then:
1675 // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
1676 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
1677 // CHECK45-32-EX: .omp.final.done:
1678 // CHECK45-32-EX-NEXT: ret void
1681 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
1682 // CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
1683 // CHECK45-32-EX-NEXT: entry:
1684 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1685 // CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1686 // CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1687 // CHECK45-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1688 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1689 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1690 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1691 // CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1692 // CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1693 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1694 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
1695 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1696 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1697 // CHECK45-32-EX: user_code.entry:
1698 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1699 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
1700 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
1701 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
1702 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1703 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1704 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
1705 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
1706 // CHECK45-32-EX-NEXT: ret void
1707 // CHECK45-32-EX: worker.exit:
1708 // CHECK45-32-EX-NEXT: ret void
1711 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
1712 // CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
1713 // CHECK45-32-EX-NEXT: entry:
1714 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1715 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1716 // CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1717 // CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1718 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1719 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
1720 // CHECK45-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1721 // CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1722 // CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1723 // CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1724 // CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1725 // CHECK45-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4
1726 // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
1727 // CHECK45-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4
1728 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1729 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1730 // CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1731 // CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1732 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1733 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1734 // CHECK45-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
1735 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1736 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1737 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1738 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1739 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1740 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1741 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1742 // CHECK45-32-EX: omp.dispatch.cond:
1743 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1744 // CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
1745 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1746 // CHECK45-32-EX: cond.true:
1747 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
1748 // CHECK45-32-EX: cond.false:
1749 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1750 // CHECK45-32-EX-NEXT: br label [[COND_END]]
1751 // CHECK45-32-EX: cond.end:
1752 // CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1753 // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1754 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1755 // CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1756 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1757 // CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1758 // CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1759 // CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1760 // CHECK45-32-EX: omp.dispatch.body:
1761 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1762 // CHECK45-32-EX: omp.inner.for.cond:
1763 // CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
1764 // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
1765 // CHECK45-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1766 // CHECK45-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1767 // CHECK45-32-EX: omp.inner.for.body:
1768 // CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1769 // CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
1770 // CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
1771 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1772 // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1773 // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1774 // CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1775 // CHECK45-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
1776 // CHECK45-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
1777 // CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
1778 // CHECK45-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
1779 // CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
1780 // CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1781 // CHECK45-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
1782 // CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1783 // CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1784 // CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
1785 // CHECK45-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
1786 // CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
1787 // CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
1788 // CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
1789 // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
1790 // CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
1791 // CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
1792 // CHECK45-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
1793 // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
1794 // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1795 // CHECK45-32-EX: omp.body.continue:
1796 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1797 // CHECK45-32-EX: omp.inner.for.inc:
1798 // CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1799 // CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
1800 // CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1801 // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
1802 // CHECK45-32-EX: omp.inner.for.end:
1803 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1804 // CHECK45-32-EX: omp.dispatch.inc:
1805 // CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1806 // CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1807 // CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1808 // CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
1809 // CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1810 // CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1811 // CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1812 // CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
1813 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
1814 // CHECK45-32-EX: omp.dispatch.end:
1815 // CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1816 // CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1817 // CHECK45-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
1818 // CHECK45-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1819 // CHECK45-32-EX: .omp.final.then:
1820 // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
1821 // CHECK45-32-EX-NEXT: store i32 10, ptr [[J]], align 4
1822 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
1823 // CHECK45-32-EX: .omp.final.done:
1824 // CHECK45-32-EX-NEXT: ret void
1827 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
1828 // CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
1829 // CHECK-64-NEXT: entry:
1830 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
1831 // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1832 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1833 // CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
1834 // CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1835 // CHECK-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
1836 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1837 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1838 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
1839 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1840 // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1841 // CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
1842 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1843 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
1844 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1845 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1846 // CHECK-64: user_code.entry:
1847 // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
1848 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
1849 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
1850 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
1851 // CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
1852 // CHECK-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
1853 // CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
1854 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1855 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1856 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
1857 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
1858 // CHECK-64-NEXT: ret void
1859 // CHECK-64: worker.exit:
1860 // CHECK-64-NEXT: ret void
1863 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
1864 // CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1865 // CHECK-64-NEXT: entry:
1866 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1867 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1868 // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1869 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1870 // CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
1871 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1872 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
1873 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1874 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1875 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
1876 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1877 // CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1878 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1879 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1880 // CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
1881 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1882 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1883 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1884 // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1885 // CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
1886 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1887 // CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1888 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1889 // CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1890 // CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1891 // CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1892 // CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1893 // CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1894 // CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
1895 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1896 // CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1897 // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1898 // CHECK-64: omp.precond.then:
1899 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1900 // CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1901 // CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
1902 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1903 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1904 // CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1905 // CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
1906 // CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
1907 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1908 // CHECK-64: omp.dispatch.cond:
1909 // CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1910 // CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1911 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1912 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1913 // CHECK-64: cond.true:
1914 // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1915 // CHECK-64-NEXT: br label [[COND_END:%.*]]
1916 // CHECK-64: cond.false:
1917 // CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1918 // CHECK-64-NEXT: br label [[COND_END]]
1919 // CHECK-64: cond.end:
1920 // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1921 // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1922 // CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1923 // CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
1924 // CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1925 // CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1926 // CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
1927 // CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1928 // CHECK-64: omp.dispatch.body:
1929 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1930 // CHECK-64: omp.inner.for.cond:
1931 // CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
1932 // CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1933 // CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1934 // CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1935 // CHECK-64: omp.inner.for.body:
1936 // CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1937 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
1938 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1939 // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1940 // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1941 // CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
1942 // CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
1943 // CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
1944 // CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
1945 // CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
1946 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1947 // CHECK-64: omp.body.continue:
1948 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1949 // CHECK-64: omp.inner.for.inc:
1950 // CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1951 // CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
1952 // CHECK-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1953 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1954 // CHECK-64: omp.inner.for.end:
1955 // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1956 // CHECK-64: omp.dispatch.inc:
1957 // CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1958 // CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1959 // CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1960 // CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
1961 // CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1962 // CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1963 // CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1964 // CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
1965 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
1966 // CHECK-64: omp.dispatch.end:
1967 // CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1968 // CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
1969 // CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
1970 // CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1971 // CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
1972 // CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1973 // CHECK-64: .omp.final.then:
1974 // CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1975 // CHECK-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
1976 // CHECK-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
1977 // CHECK-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
1978 // CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
1979 // CHECK-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
1980 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
1981 // CHECK-64: .omp.final.done:
1982 // CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1983 // CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
1984 // CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1985 // CHECK-64: .omp.lastprivate.then:
1986 // CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
1987 // CHECK-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
1988 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1989 // CHECK-64: .omp.lastprivate.done:
1990 // CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
1991 // CHECK-64: omp.precond.end:
1992 // CHECK-64-NEXT: ret void
1995 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
1996 // CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
1997 // CHECK-64-NEXT: entry:
1998 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
1999 // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2000 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
2001 // CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2002 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2003 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2004 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
2005 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2006 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
2007 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
2008 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
2009 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2010 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2011 // CHECK-64: user_code.entry:
2012 // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2013 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
2014 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
2015 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
2016 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2017 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2018 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
2019 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
2020 // CHECK-64-NEXT: ret void
2021 // CHECK-64: worker.exit:
2022 // CHECK-64-NEXT: ret void
2025 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
2026 // CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
2027 // CHECK-64-NEXT: entry:
2028 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2029 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2030 // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2031 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
2032 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2033 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
2034 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2035 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
2036 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
2037 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2038 // CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2039 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2040 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2041 // CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
2042 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2043 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2044 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2045 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
2046 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
2047 // CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
2048 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
2049 // CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2050 // CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
2051 // CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2052 // CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
2053 // CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
2054 // CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
2055 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2056 // CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
2057 // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2058 // CHECK-64: omp.precond.then:
2059 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2060 // CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2061 // CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
2062 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2063 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2064 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2065 // CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2066 // CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
2067 // CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2068 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2069 // CHECK-64: omp.dispatch.cond:
2070 // CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2071 // CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2072 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
2073 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2074 // CHECK-64: cond.true:
2075 // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2076 // CHECK-64-NEXT: br label [[COND_END:%.*]]
2077 // CHECK-64: cond.false:
2078 // CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2079 // CHECK-64-NEXT: br label [[COND_END]]
2080 // CHECK-64: cond.end:
2081 // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
2082 // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2083 // CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2084 // CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
2085 // CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2086 // CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2087 // CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
2088 // CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2089 // CHECK-64: omp.dispatch.body:
2090 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2091 // CHECK-64: omp.inner.for.cond:
2092 // CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
2093 // CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
2094 // CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
2095 // CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2096 // CHECK-64: omp.inner.for.body:
2097 // CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2098 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
2099 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2100 // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
2101 // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
2102 // CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
2103 // CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
2104 // CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
2105 // CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
2106 // CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
2107 // CHECK-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
2108 // CHECK-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
2109 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2110 // CHECK-64: omp.body.continue:
2111 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2112 // CHECK-64: omp.inner.for.inc:
2113 // CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2114 // CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
2115 // CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2116 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2117 // CHECK-64: omp.inner.for.end:
2118 // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2119 // CHECK-64: omp.dispatch.inc:
2120 // CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2121 // CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2122 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2123 // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
2124 // CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2125 // CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2126 // CHECK-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2127 // CHECK-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
2128 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
2129 // CHECK-64: omp.dispatch.end:
2130 // CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2131 // CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
2132 // CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
2133 // CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2134 // CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
2135 // CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2136 // CHECK-64: .omp.final.then:
2137 // CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2138 // CHECK-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
2139 // CHECK-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
2140 // CHECK-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
2141 // CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
2142 // CHECK-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
2143 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
2144 // CHECK-64: .omp.final.done:
2145 // CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
2146 // CHECK-64: omp.precond.end:
2147 // CHECK-64-NEXT: ret void
2150 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
2151 // CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
2152 // CHECK-64-NEXT: entry:
2153 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
2154 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2155 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2156 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2157 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
2158 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2159 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2160 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
2161 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2162 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2163 // CHECK-64: user_code.entry:
2164 // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2165 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2166 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2167 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
2168 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
2169 // CHECK-64-NEXT: ret void
2170 // CHECK-64: worker.exit:
2171 // CHECK-64-NEXT: ret void
2174 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
2175 // CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
2176 // CHECK-64-NEXT: entry:
2177 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2178 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2179 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2180 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2181 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
2182 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2183 // CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2184 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2185 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2186 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
2187 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2188 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2189 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2190 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2191 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2192 // CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
2193 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2194 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2195 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2196 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2197 // CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
2198 // CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2199 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2200 // CHECK-64: omp.dispatch.cond:
2201 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2202 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
2203 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2204 // CHECK-64: cond.true:
2205 // CHECK-64-NEXT: br label [[COND_END:%.*]]
2206 // CHECK-64: cond.false:
2207 // CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2208 // CHECK-64-NEXT: br label [[COND_END]]
2209 // CHECK-64: cond.end:
2210 // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2211 // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2212 // CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2213 // CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
2214 // CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2215 // CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2216 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2217 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2218 // CHECK-64: omp.dispatch.body:
2219 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2220 // CHECK-64: omp.inner.for.cond:
2221 // CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
2222 // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
2223 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2224 // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2225 // CHECK-64: omp.inner.for.body:
2226 // CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2227 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
2228 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2229 // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
2230 // CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
2231 // CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
2232 // CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
2233 // CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
2234 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
2235 // CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
2236 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2237 // CHECK-64: omp.body.continue:
2238 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2239 // CHECK-64: omp.inner.for.inc:
2240 // CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2241 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
2242 // CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2243 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
2244 // CHECK-64: omp.inner.for.end:
2245 // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2246 // CHECK-64: omp.dispatch.inc:
2247 // CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2248 // CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2249 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
2250 // CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
2251 // CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2252 // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2253 // CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
2254 // CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
2255 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
2256 // CHECK-64: omp.dispatch.end:
2257 // CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
2258 // CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2259 // CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
2260 // CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2261 // CHECK-64: .omp.final.then:
2262 // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
2263 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
2264 // CHECK-64: .omp.final.done:
2265 // CHECK-64-NEXT: ret void
2268 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
2269 // CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] {
2270 // CHECK-64-NEXT: entry:
2271 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
2272 // CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
2273 // CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
2274 // CHECK-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
2275 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2276 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2277 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
2278 // CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
2279 // CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
2280 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
2281 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
2282 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2283 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2284 // CHECK-64: user_code.entry:
2285 // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2286 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
2287 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
2288 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
2289 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2290 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2291 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
2292 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
2293 // CHECK-64-NEXT: ret void
2294 // CHECK-64: worker.exit:
2295 // CHECK-64-NEXT: ret void
2298 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
2299 // CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] {
2300 // CHECK-64-NEXT: entry:
2301 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2302 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2303 // CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
2304 // CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
2305 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2306 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
2307 // CHECK-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
2308 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2309 // CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2310 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2311 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2312 // CHECK-64-NEXT: [[K:%.*]] = alloca i32, align 4
2313 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
2314 // CHECK-64-NEXT: [[J:%.*]] = alloca i32, align 4
2315 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2316 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2317 // CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
2318 // CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
2319 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
2320 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2321 // CHECK-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
2322 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2323 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2324 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2325 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2326 // CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
2327 // CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2328 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2329 // CHECK-64: omp.dispatch.cond:
2330 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2331 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
2332 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2333 // CHECK-64: cond.true:
2334 // CHECK-64-NEXT: br label [[COND_END:%.*]]
2335 // CHECK-64: cond.false:
2336 // CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2337 // CHECK-64-NEXT: br label [[COND_END]]
2338 // CHECK-64: cond.end:
2339 // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2340 // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2341 // CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2342 // CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
2343 // CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2344 // CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2345 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2346 // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2347 // CHECK-64: omp.dispatch.body:
2348 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2349 // CHECK-64: omp.inner.for.cond:
2350 // CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
2351 // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
2352 // CHECK-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2353 // CHECK-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2354 // CHECK-64: omp.inner.for.body:
2355 // CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2356 // CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
2357 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
2358 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2359 // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2360 // CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2361 // CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2362 // CHECK-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
2363 // CHECK-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
2364 // CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
2365 // CHECK-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
2366 // CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
2367 // CHECK-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2368 // CHECK-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
2369 // CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2370 // CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2371 // CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
2372 // CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
2373 // CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
2374 // CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
2375 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
2376 // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2377 // CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
2378 // CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
2379 // CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2380 // CHECK-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64
2381 // CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
2382 // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP28]]
2383 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2384 // CHECK-64: omp.body.continue:
2385 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2386 // CHECK-64: omp.inner.for.inc:
2387 // CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2388 // CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1
2389 // CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2390 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2391 // CHECK-64: omp.inner.for.end:
2392 // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2393 // CHECK-64: omp.dispatch.inc:
2394 // CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2395 // CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2396 // CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2397 // CHECK-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4
2398 // CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2399 // CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2400 // CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2401 // CHECK-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4
2402 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
2403 // CHECK-64: omp.dispatch.end:
2404 // CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
2405 // CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2406 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
2407 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2408 // CHECK-64: .omp.final.then:
2409 // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
2410 // CHECK-64-NEXT: store i32 10, ptr [[J]], align 4
2411 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
2412 // CHECK-64: .omp.final.done:
2413 // CHECK-64-NEXT: ret void
2416 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
2417 // CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
2418 // CHECK-32-NEXT: entry:
2419 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
2420 // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
2421 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
2422 // CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
2423 // CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
2424 // CHECK-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
2425 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2426 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2427 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
2428 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
2429 // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
2430 // CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
2431 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
2432 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
2433 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2434 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2435 // CHECK-32: user_code.entry:
2436 // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
2437 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
2438 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
2439 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
2440 // CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
2441 // CHECK-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
2442 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
2443 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2444 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2445 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
2446 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
2447 // CHECK-32-NEXT: ret void
2448 // CHECK-32: worker.exit:
2449 // CHECK-32-NEXT: ret void
2452 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
2453 // CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
2454 // CHECK-32-NEXT: entry:
2455 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
2456 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
2457 // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
2458 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
2459 // CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
2460 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2461 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
2462 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2463 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
2464 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
2465 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2466 // CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2467 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2468 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2469 // CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
2470 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
2471 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
2472 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
2473 // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
2474 // CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
2475 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
2476 // CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
2477 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
2478 // CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2479 // CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
2480 // CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2481 // CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
2482 // CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
2483 // CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
2484 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2485 // CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
2486 // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2487 // CHECK-32: omp.precond.then:
2488 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2489 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2490 // CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
2491 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2492 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2493 // CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2494 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
2495 // CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
2496 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2497 // CHECK-32: omp.dispatch.cond:
2498 // CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2499 // CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2500 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
2501 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2502 // CHECK-32: cond.true:
2503 // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2504 // CHECK-32-NEXT: br label [[COND_END:%.*]]
2505 // CHECK-32: cond.false:
2506 // CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2507 // CHECK-32-NEXT: br label [[COND_END]]
2508 // CHECK-32: cond.end:
2509 // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
2510 // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2511 // CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2512 // CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
2513 // CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2514 // CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2515 // CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
2516 // CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2517 // CHECK-32: omp.dispatch.body:
2518 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2519 // CHECK-32: omp.inner.for.cond:
2520 // CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
2521 // CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
2522 // CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
2523 // CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2524 // CHECK-32: omp.inner.for.body:
2525 // CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
2526 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
2527 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2528 // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
2529 // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
2530 // CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
2531 // CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
2532 // CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
2533 // CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
2534 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2535 // CHECK-32: omp.body.continue:
2536 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2537 // CHECK-32: omp.inner.for.inc:
2538 // CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
2539 // CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
2540 // CHECK-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
2541 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
2542 // CHECK-32: omp.inner.for.end:
2543 // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2544 // CHECK-32: omp.dispatch.inc:
2545 // CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2546 // CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2547 // CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2548 // CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
2549 // CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2550 // CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2551 // CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2552 // CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
2553 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
2554 // CHECK-32: omp.dispatch.end:
2555 // CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2556 // CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
2557 // CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
2558 // CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2559 // CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
2560 // CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2561 // CHECK-32: .omp.final.then:
2562 // CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2563 // CHECK-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
2564 // CHECK-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
2565 // CHECK-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
2566 // CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
2567 // CHECK-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
2568 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
2569 // CHECK-32: .omp.final.done:
2570 // CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2571 // CHECK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
2572 // CHECK-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
2573 // CHECK-32: .omp.lastprivate.then:
2574 // CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
2575 // CHECK-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
2576 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
2577 // CHECK-32: .omp.lastprivate.done:
2578 // CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
2579 // CHECK-32: omp.precond.end:
2580 // CHECK-32-NEXT: ret void
2583 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
2584 // CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
2585 // CHECK-32-NEXT: entry:
2586 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
2587 // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
2588 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
2589 // CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
2590 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2591 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2592 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
2593 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
2594 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
2595 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
2596 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
2597 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2598 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2599 // CHECK-32: user_code.entry:
2600 // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2601 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
2602 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
2603 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
2604 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2605 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2606 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
2607 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
2608 // CHECK-32-NEXT: ret void
2609 // CHECK-32: worker.exit:
2610 // CHECK-32-NEXT: ret void
2613 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
2614 // CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
2615 // CHECK-32-NEXT: entry:
2616 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
2617 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
2618 // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
2619 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
2620 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2621 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
2622 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2623 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
2624 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
2625 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2626 // CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2627 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2628 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2629 // CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
2630 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
2631 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
2632 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
2633 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
2634 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
2635 // CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
2636 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
2637 // CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2638 // CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
2639 // CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2640 // CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
2641 // CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
2642 // CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
2643 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2644 // CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
2645 // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2646 // CHECK-32: omp.precond.then:
2647 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2648 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2649 // CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
2650 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2651 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2652 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2653 // CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2654 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
2655 // CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2656 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2657 // CHECK-32: omp.dispatch.cond:
2658 // CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2659 // CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2660 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
2661 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2662 // CHECK-32: cond.true:
2663 // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
2664 // CHECK-32-NEXT: br label [[COND_END:%.*]]
2665 // CHECK-32: cond.false:
2666 // CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2667 // CHECK-32-NEXT: br label [[COND_END]]
2668 // CHECK-32: cond.end:
2669 // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
2670 // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2671 // CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2672 // CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
2673 // CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2674 // CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2675 // CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
2676 // CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2677 // CHECK-32: omp.dispatch.body:
2678 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2679 // CHECK-32: omp.inner.for.cond:
2680 // CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
2681 // CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
2682 // CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
2683 // CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2684 // CHECK-32: omp.inner.for.body:
2685 // CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2686 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
2687 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2688 // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
2689 // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
2690 // CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
2691 // CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
2692 // CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
2693 // CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
2694 // CHECK-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
2695 // CHECK-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
2696 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2697 // CHECK-32: omp.body.continue:
2698 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2699 // CHECK-32: omp.inner.for.inc:
2700 // CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2701 // CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
2702 // CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
2703 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2704 // CHECK-32: omp.inner.for.end:
2705 // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2706 // CHECK-32: omp.dispatch.inc:
2707 // CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2708 // CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2709 // CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2710 // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
2711 // CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2712 // CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2713 // CHECK-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2714 // CHECK-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
2715 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
2716 // CHECK-32: omp.dispatch.end:
2717 // CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2718 // CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
2719 // CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
2720 // CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2721 // CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
2722 // CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2723 // CHECK-32: .omp.final.then:
2724 // CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2725 // CHECK-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
2726 // CHECK-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
2727 // CHECK-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
2728 // CHECK-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
2729 // CHECK-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
2730 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
2731 // CHECK-32: .omp.final.done:
2732 // CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
2733 // CHECK-32: omp.precond.end:
2734 // CHECK-32-NEXT: ret void
2737 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
2738 // CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
2739 // CHECK-32-NEXT: entry:
2740 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
2741 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
2742 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2743 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2744 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
2745 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
2746 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
2747 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
2748 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2749 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2750 // CHECK-32: user_code.entry:
2751 // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2752 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2753 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2754 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
2755 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
2756 // CHECK-32-NEXT: ret void
2757 // CHECK-32: worker.exit:
2758 // CHECK-32-NEXT: ret void
2761 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
2762 // CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
2763 // CHECK-32-NEXT: entry:
2764 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
2765 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
2766 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
2767 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2768 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
2769 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2770 // CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2771 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2772 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2773 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
2774 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
2775 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
2776 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
2777 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
2778 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2779 // CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
2780 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2781 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2782 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2783 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2784 // CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
2785 // CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2786 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2787 // CHECK-32: omp.dispatch.cond:
2788 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2789 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
2790 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2791 // CHECK-32: cond.true:
2792 // CHECK-32-NEXT: br label [[COND_END:%.*]]
2793 // CHECK-32: cond.false:
2794 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2795 // CHECK-32-NEXT: br label [[COND_END]]
2796 // CHECK-32: cond.end:
2797 // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2798 // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2799 // CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2800 // CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
2801 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2802 // CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2803 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2804 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2805 // CHECK-32: omp.dispatch.body:
2806 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2807 // CHECK-32: omp.inner.for.cond:
2808 // CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
2809 // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
2810 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2811 // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2812 // CHECK-32: omp.inner.for.body:
2813 // CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2814 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
2815 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2816 // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
2817 // CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
2818 // CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
2819 // CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
2820 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
2821 // CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
2822 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2823 // CHECK-32: omp.body.continue:
2824 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2825 // CHECK-32: omp.inner.for.inc:
2826 // CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2827 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
2828 // CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
2829 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
2830 // CHECK-32: omp.inner.for.end:
2831 // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2832 // CHECK-32: omp.dispatch.inc:
2833 // CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2834 // CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2835 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
2836 // CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
2837 // CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2838 // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2839 // CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
2840 // CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
2841 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
2842 // CHECK-32: omp.dispatch.end:
2843 // CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
2844 // CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2845 // CHECK-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
2846 // CHECK-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2847 // CHECK-32: .omp.final.then:
2848 // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
2849 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
2850 // CHECK-32: .omp.final.done:
2851 // CHECK-32-NEXT: ret void
2854 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
2855 // CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
2856 // CHECK-32-NEXT: entry:
2857 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
2858 // CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
2859 // CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
2860 // CHECK-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
2861 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2862 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2863 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
2864 // CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
2865 // CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
2866 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
2867 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
2868 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
2869 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2870 // CHECK-32: user_code.entry:
2871 // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
2872 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
2873 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
2874 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
2875 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
2876 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
2877 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
2878 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
2879 // CHECK-32-NEXT: ret void
2880 // CHECK-32: worker.exit:
2881 // CHECK-32-NEXT: ret void
2884 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
2885 // CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
2886 // CHECK-32-NEXT: entry:
2887 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
2888 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
2889 // CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
2890 // CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
2891 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2892 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
2893 // CHECK-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
2894 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2895 // CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2896 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2897 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2898 // CHECK-32-NEXT: [[K:%.*]] = alloca i32, align 4
2899 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
2900 // CHECK-32-NEXT: [[J:%.*]] = alloca i32, align 4
2901 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
2902 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
2903 // CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
2904 // CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
2905 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
2906 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2907 // CHECK-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
2908 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2909 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2910 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2911 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2912 // CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
2913 // CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2914 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
2915 // CHECK-32: omp.dispatch.cond:
2916 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2917 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
2918 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2919 // CHECK-32: cond.true:
2920 // CHECK-32-NEXT: br label [[COND_END:%.*]]
2921 // CHECK-32: cond.false:
2922 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2923 // CHECK-32-NEXT: br label [[COND_END]]
2924 // CHECK-32: cond.end:
2925 // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2926 // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2927 // CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2928 // CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
2929 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2930 // CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2931 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2932 // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
2933 // CHECK-32: omp.dispatch.body:
2934 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2935 // CHECK-32: omp.inner.for.cond:
2936 // CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
2937 // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
2938 // CHECK-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2939 // CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2940 // CHECK-32: omp.inner.for.body:
2941 // CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2942 // CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
2943 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
2944 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2945 // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2946 // CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2947 // CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2948 // CHECK-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
2949 // CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
2950 // CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
2951 // CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
2952 // CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
2953 // CHECK-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2954 // CHECK-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
2955 // CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2956 // CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2957 // CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
2958 // CHECK-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
2959 // CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
2960 // CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
2961 // CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
2962 // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
2963 // CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
2964 // CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
2965 // CHECK-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
2966 // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
2967 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2968 // CHECK-32: omp.body.continue:
2969 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2970 // CHECK-32: omp.inner.for.inc:
2971 // CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2972 // CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
2973 // CHECK-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
2974 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2975 // CHECK-32: omp.inner.for.end:
2976 // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
2977 // CHECK-32: omp.dispatch.inc:
2978 // CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2979 // CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2980 // CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2981 // CHECK-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
2982 // CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2983 // CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2984 // CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2985 // CHECK-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
2986 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
2987 // CHECK-32: omp.dispatch.end:
2988 // CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
2989 // CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2990 // CHECK-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
2991 // CHECK-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2992 // CHECK-32: .omp.final.then:
2993 // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
2994 // CHECK-32-NEXT: store i32 10, ptr [[J]], align 4
2995 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
2996 // CHECK-32: .omp.final.done:
2997 // CHECK-32-NEXT: ret void
3000 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
3001 // CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
3002 // CHECK-32-EX-NEXT: entry:
3003 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
3004 // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
3005 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
3006 // CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
3007 // CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
3008 // CHECK-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
3009 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3010 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3011 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
3012 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
3013 // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
3014 // CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
3015 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
3016 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
3017 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
3018 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3019 // CHECK-32-EX: user_code.entry:
3020 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
3021 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
3022 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
3023 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
3024 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
3025 // CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
3026 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
3027 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
3028 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
3029 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
3030 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
3031 // CHECK-32-EX-NEXT: ret void
3032 // CHECK-32-EX: worker.exit:
3033 // CHECK-32-EX-NEXT: ret void
3036 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
3037 // CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
3038 // CHECK-32-EX-NEXT: entry:
3039 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
3040 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
3041 // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
3042 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
3043 // CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
3044 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
3045 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
3046 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3047 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
3048 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
3049 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
3050 // CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
3051 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
3052 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3053 // CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
3054 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
3055 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
3056 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
3057 // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
3058 // CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
3059 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
3060 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
3061 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
3062 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3063 // CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
3064 // CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3065 // CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
3066 // CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
3067 // CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
3068 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3069 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
3070 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
3071 // CHECK-32-EX: omp.precond.then:
3072 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
3073 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3074 // CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
3075 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
3076 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3077 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3078 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
3079 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
3080 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
3081 // CHECK-32-EX: omp.dispatch.cond:
3082 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3083 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3084 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
3085 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3086 // CHECK-32-EX: cond.true:
3087 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3088 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
3089 // CHECK-32-EX: cond.false:
3090 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3091 // CHECK-32-EX-NEXT: br label [[COND_END]]
3092 // CHECK-32-EX: cond.end:
3093 // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
3094 // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
3095 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3096 // CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
3097 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
3098 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3099 // CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
3100 // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
3101 // CHECK-32-EX: omp.dispatch.body:
3102 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3103 // CHECK-32-EX: omp.inner.for.cond:
3104 // CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
3105 // CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
3106 // CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
3107 // CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3108 // CHECK-32-EX: omp.inner.for.body:
3109 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
3110 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
3111 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
3112 // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
3113 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
3114 // CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
3115 // CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
3116 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
3117 // CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
3118 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3119 // CHECK-32-EX: omp.body.continue:
3120 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3121 // CHECK-32-EX: omp.inner.for.inc:
3122 // CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
3123 // CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
3124 // CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
3125 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
3126 // CHECK-32-EX: omp.inner.for.end:
3127 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
3128 // CHECK-32-EX: omp.dispatch.inc:
3129 // CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3130 // CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3131 // CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
3132 // CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
3133 // CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3134 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3135 // CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
3136 // CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
3137 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
3138 // CHECK-32-EX: omp.dispatch.end:
3139 // CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3140 // CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
3141 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
3142 // CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
3143 // CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
3144 // CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
3145 // CHECK-32-EX: .omp.final.then:
3146 // CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3147 // CHECK-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
3148 // CHECK-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
3149 // CHECK-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
3150 // CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
3151 // CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
3152 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
3153 // CHECK-32-EX: .omp.final.done:
3154 // CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
3155 // CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
3156 // CHECK-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
3157 // CHECK-32-EX: .omp.lastprivate.then:
3158 // CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
3159 // CHECK-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
3160 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
3161 // CHECK-32-EX: .omp.lastprivate.done:
3162 // CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]]
3163 // CHECK-32-EX: omp.precond.end:
3164 // CHECK-32-EX-NEXT: ret void
3167 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
3168 // CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
3169 // CHECK-32-EX-NEXT: entry:
3170 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
3171 // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
3172 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
3173 // CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
3174 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3175 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3176 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
3177 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
3178 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
3179 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
3180 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
3181 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
3182 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3183 // CHECK-32-EX: user_code.entry:
3184 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
3185 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
3186 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
3187 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
3188 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
3189 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
3190 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
3191 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
3192 // CHECK-32-EX-NEXT: ret void
3193 // CHECK-32-EX: worker.exit:
3194 // CHECK-32-EX-NEXT: ret void
3197 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
3198 // CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
3199 // CHECK-32-EX-NEXT: entry:
3200 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
3201 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
3202 // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
3203 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
3204 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
3205 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
3206 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3207 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
3208 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
3209 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
3210 // CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
3211 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
3212 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3213 // CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
3214 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
3215 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
3216 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
3217 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
3218 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
3219 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
3220 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
3221 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3222 // CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
3223 // CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3224 // CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
3225 // CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
3226 // CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
3227 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3228 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
3229 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
3230 // CHECK-32-EX: omp.precond.then:
3231 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
3232 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3233 // CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
3234 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
3235 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3236 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3237 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3238 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
3239 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
3240 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
3241 // CHECK-32-EX: omp.dispatch.cond:
3242 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3243 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3244 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
3245 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3246 // CHECK-32-EX: cond.true:
3247 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
3248 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
3249 // CHECK-32-EX: cond.false:
3250 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3251 // CHECK-32-EX-NEXT: br label [[COND_END]]
3252 // CHECK-32-EX: cond.end:
3253 // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
3254 // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
3255 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3256 // CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
3257 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
3258 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3259 // CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
3260 // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
3261 // CHECK-32-EX: omp.dispatch.body:
3262 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3263 // CHECK-32-EX: omp.inner.for.cond:
3264 // CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
3265 // CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
3266 // CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
3267 // CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3268 // CHECK-32-EX: omp.inner.for.body:
3269 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
3270 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
3271 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
3272 // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
3273 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
3274 // CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
3275 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
3276 // CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
3277 // CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
3278 // CHECK-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
3279 // CHECK-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
3280 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3281 // CHECK-32-EX: omp.body.continue:
3282 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3283 // CHECK-32-EX: omp.inner.for.inc:
3284 // CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
3285 // CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
3286 // CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
3287 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
3288 // CHECK-32-EX: omp.inner.for.end:
3289 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
3290 // CHECK-32-EX: omp.dispatch.inc:
3291 // CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3292 // CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3293 // CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
3294 // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
3295 // CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3296 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3297 // CHECK-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
3298 // CHECK-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
3299 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
3300 // CHECK-32-EX: omp.dispatch.end:
3301 // CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3302 // CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
3303 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
3304 // CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
3305 // CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
3306 // CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
3307 // CHECK-32-EX: .omp.final.then:
3308 // CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3309 // CHECK-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
3310 // CHECK-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
3311 // CHECK-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
3312 // CHECK-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
3313 // CHECK-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
3314 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
3315 // CHECK-32-EX: .omp.final.done:
3316 // CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]]
3317 // CHECK-32-EX: omp.precond.end:
3318 // CHECK-32-EX-NEXT: ret void
3321 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
3322 // CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
3323 // CHECK-32-EX-NEXT: entry:
3324 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
3325 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
3326 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3327 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3328 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
3329 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
3330 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
3331 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
3332 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
3333 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3334 // CHECK-32-EX: user_code.entry:
3335 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
3336 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
3337 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
3338 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
3339 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
3340 // CHECK-32-EX-NEXT: ret void
3341 // CHECK-32-EX: worker.exit:
3342 // CHECK-32-EX-NEXT: ret void
3345 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
3346 // CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
3347 // CHECK-32-EX-NEXT: entry:
3348 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
3349 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
3350 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
3351 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
3352 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
3353 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
3354 // CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
3355 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
3356 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3357 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
3358 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
3359 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
3360 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
3361 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
3362 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
3363 // CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
3364 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
3365 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3366 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3367 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3368 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
3369 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
3370 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
3371 // CHECK-32-EX: omp.dispatch.cond:
3372 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3373 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
3374 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3375 // CHECK-32-EX: cond.true:
3376 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
3377 // CHECK-32-EX: cond.false:
3378 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3379 // CHECK-32-EX-NEXT: br label [[COND_END]]
3380 // CHECK-32-EX: cond.end:
3381 // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
3382 // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
3383 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3384 // CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
3385 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
3386 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3387 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
3388 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
3389 // CHECK-32-EX: omp.dispatch.body:
3390 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3391 // CHECK-32-EX: omp.inner.for.cond:
3392 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
3393 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
3394 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
3395 // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3396 // CHECK-32-EX: omp.inner.for.body:
3397 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
3398 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
3399 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
3400 // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
3401 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
3402 // CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
3403 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
3404 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
3405 // CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
3406 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3407 // CHECK-32-EX: omp.body.continue:
3408 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3409 // CHECK-32-EX: omp.inner.for.inc:
3410 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
3411 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
3412 // CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
3413 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
3414 // CHECK-32-EX: omp.inner.for.end:
3415 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
3416 // CHECK-32-EX: omp.dispatch.inc:
3417 // CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3418 // CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3419 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
3420 // CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
3421 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3422 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3423 // CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
3424 // CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
3425 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
3426 // CHECK-32-EX: omp.dispatch.end:
3427 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
3428 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
3429 // CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
3430 // CHECK-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
3431 // CHECK-32-EX: .omp.final.then:
3432 // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
3433 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
3434 // CHECK-32-EX: .omp.final.done:
3435 // CHECK-32-EX-NEXT: ret void
3438 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
3439 // CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
3440 // CHECK-32-EX-NEXT: entry:
3441 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
3442 // CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
3443 // CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
3444 // CHECK-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
3445 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3446 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3447 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
3448 // CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
3449 // CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
3450 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
3451 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
3452 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
3453 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3454 // CHECK-32-EX: user_code.entry:
3455 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
3456 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
3457 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
3458 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
3459 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
3460 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
3461 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
3462 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
3463 // CHECK-32-EX-NEXT: ret void
3464 // CHECK-32-EX: worker.exit:
3465 // CHECK-32-EX-NEXT: ret void
3468 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
3469 // CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
3470 // CHECK-32-EX-NEXT: entry:
3471 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
3472 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
3473 // CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
3474 // CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
3475 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
3476 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
3477 // CHECK-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
3478 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
3479 // CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
3480 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
3481 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3482 // CHECK-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4
3483 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
3484 // CHECK-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4
3485 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
3486 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
3487 // CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
3488 // CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
3489 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
3490 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
3491 // CHECK-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
3492 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
3493 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3494 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3495 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
3496 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
3497 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
3498 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
3499 // CHECK-32-EX: omp.dispatch.cond:
3500 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3501 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
3502 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3503 // CHECK-32-EX: cond.true:
3504 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
3505 // CHECK-32-EX: cond.false:
3506 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3507 // CHECK-32-EX-NEXT: br label [[COND_END]]
3508 // CHECK-32-EX: cond.end:
3509 // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
3510 // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
3511 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3512 // CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
3513 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
3514 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3515 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
3516 // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
3517 // CHECK-32-EX: omp.dispatch.body:
3518 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3519 // CHECK-32-EX: omp.inner.for.cond:
3520 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
3521 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
3522 // CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
3523 // CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3524 // CHECK-32-EX: omp.inner.for.body:
3525 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
3526 // CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
3527 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
3528 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
3529 // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
3530 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
3531 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
3532 // CHECK-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
3533 // CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
3534 // CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
3535 // CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
3536 // CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
3537 // CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
3538 // CHECK-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
3539 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
3540 // CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
3541 // CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
3542 // CHECK-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
3543 // CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
3544 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
3545 // CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
3546 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
3547 // CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
3548 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
3549 // CHECK-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
3550 // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
3551 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3552 // CHECK-32-EX: omp.body.continue:
3553 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3554 // CHECK-32-EX: omp.inner.for.inc:
3555 // CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
3556 // CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
3557 // CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
3558 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
3559 // CHECK-32-EX: omp.inner.for.end:
3560 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
3561 // CHECK-32-EX: omp.dispatch.inc:
3562 // CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
3563 // CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3564 // CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
3565 // CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
3566 // CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
3567 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
3568 // CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
3569 // CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
3570 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
3571 // CHECK-32-EX: omp.dispatch.end:
3572 // CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
3573 // CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
3574 // CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
3575 // CHECK-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
3576 // CHECK-32-EX: .omp.final.then:
3577 // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
3578 // CHECK-32-EX-NEXT: store i32 10, ptr [[J]], align 4
3579 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
3580 // CHECK-32-EX: .omp.final.done:
3581 // CHECK-32-EX-NEXT: ret void