1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
2 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU1
3 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX1
4 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU2
5 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU3
6 ; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX2
7 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX3
12 ;; void unknown_pure(void) __attribute__((pure));
13 ;; [[omp::assume("omp_no_openmp")]] void unknown_no_openmp(void);
16 ;; void no_parallel_region_in_here(void) {
21 ;; void no_state_machine_needed() {
22 ;; #pragma omp target teams
24 ;; no_parallel_region_in_here();
25 ;; unknown_no_openmp();
29 ;; void simple_state_machine() {
30 ;; #pragma omp target teams
32 ;; unknown_no_openmp();
33 ;; #pragma omp parallel
35 ;; no_parallel_region_in_here();
36 ;; #pragma omp parallel
41 ;; void simple_state_machine_interprocedural_after(void);
42 ;; void simple_state_machine_interprocedural_before(void) {
43 ;; #pragma omp parallel
46 ;; void simple_state_machine_interprocedural() {
47 ;; #pragma omp target teams
49 ;; unknown_no_openmp();
50 ;; simple_state_machine_interprocedural_before();
51 ;; no_parallel_region_in_here();
52 ;; #pragma omp parallel
54 ;; simple_state_machine_interprocedural_after();
57 ;; void simple_state_machine_interprocedural_after(void) {
58 ;; #pragma omp parallel
62 ;; void simple_state_machine_with_fallback() {
63 ;; #pragma omp target teams
65 ;; #pragma omp parallel
68 ;; #pragma omp parallel
73 ;; void simple_state_machine_no_openmp_attr() {
74 ;; #pragma omp target teams
76 ;; #pragma omp parallel
78 ;; unknown_no_openmp();
79 ;; #pragma omp parallel
84 ;; void simple_state_machine_pure() {
85 ;; #pragma omp target teams
87 ;; unknown_no_openmp();
88 ;; #pragma omp parallel
91 ;; #pragma omp parallel
96 ;; int omp_get_thread_num();
97 ;; void simple_state_machine_interprocedural_nested_recursive_after(int);
98 ;; void simple_state_machine_interprocedural_nested_recursive_after_after(void);
99 ;; void simple_state_machine_interprocedural_nested_recursive() {
100 ;; #pragma omp target teams
102 ;; simple_state_machine_interprocedural_nested_recursive_after(
103 ;; omp_get_thread_num());
107 ;; void simple_state_machine_interprocedural_nested_recursive_after(int a) {
110 ;; simple_state_machine_interprocedural_nested_recursive_after(a - 1);
111 ;; simple_state_machine_interprocedural_nested_recursive_after_after();
113 ;; void simple_state_machine_interprocedural_nested_recursive_after_after(void) {
114 ;; #pragma omp parallel
118 ;; __attribute__((weak)) void weak_callee_empty(void) {}
119 ;; void no_state_machine_weak_callee() {
120 ;; #pragma omp target teams
121 ;; { weak_callee_empty(); }
124 %struct.ident_t = type { i32, i32, i32, i32, ptr }
125 %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
126 %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
128 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
129 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
130 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
131 @G = external global i32, align 4
132 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
133 @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
134 @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
135 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
136 @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
137 @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
138 @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
139 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
140 @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
142 define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
144 %.zero.addr = alloca i32, align 4
145 %.threadid_temp. = alloca i32, align 4
146 store i32 0, ptr %.zero.addr, align 4
147 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn)
148 %exec_user_code = icmp eq i32 %0, -1
149 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
151 user_code.entry: ; preds = %entry
152 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
153 store i32 %1, ptr %.threadid_temp., align 4
154 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
155 call void @__kmpc_target_deinit()
158 worker.exit: ; preds = %entry
162 ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO.
163 declare i32 @__kmpc_target_init(ptr);
165 define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
167 %.global_tid..addr = alloca ptr, align 8
168 %.bound_tid..addr = alloca ptr, align 8
169 store ptr %.global_tid., ptr %.global_tid..addr, align 8
170 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
171 call void @no_parallel_region_in_here() #7
172 call void @unknown_no_openmp() #8
176 define hidden void @no_parallel_region_in_here() #1 {
178 %0 = call i32 @__kmpc_global_thread_num(ptr @2)
179 %1 = call i32 @__kmpc_single(ptr @2, i32 %0)
180 %2 = icmp ne i32 %1, 0
181 br i1 %2, label %omp_if.then, label %omp_if.end
183 omp_if.then: ; preds = %entry
184 store i32 0, ptr @G, align 4
185 call void @__kmpc_end_single(ptr @2, i32 %0)
188 omp_if.end: ; preds = %omp_if.then, %entry
189 call void @__kmpc_barrier(ptr @3, i32 %0)
193 declare void @unknown_no_openmp() #2
195 declare i32 @__kmpc_global_thread_num(ptr) #3
197 declare void @__kmpc_target_deinit()
199 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
201 %.zero.addr = alloca i32, align 4
202 %.threadid_temp. = alloca i32, align 4
203 store i32 0, ptr %.zero.addr, align 4
204 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn)
205 %exec_user_code = icmp eq i32 %0, -1
206 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
208 user_code.entry: ; preds = %entry
209 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
210 store i32 %1, ptr %.threadid_temp., align 4
211 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
212 call void @__kmpc_target_deinit()
215 worker.exit: ; preds = %entry
219 define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
221 %.global_tid..addr = alloca ptr, align 8
222 %.bound_tid..addr = alloca ptr, align 8
223 %captured_vars_addrs = alloca [0 x ptr], align 8
224 %captured_vars_addrs1 = alloca [0 x ptr], align 8
225 store ptr %.global_tid., ptr %.global_tid..addr, align 8
226 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
227 call void @unknown_no_openmp() #8
228 %0 = load ptr, ptr %.global_tid..addr, align 8
229 %1 = load i32, ptr %0, align 4
230 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
231 call void @no_parallel_region_in_here() #7
232 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
236 define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
238 %.global_tid..addr = alloca ptr, align 8
239 %.bound_tid..addr = alloca ptr, align 8
240 store ptr %.global_tid., ptr %.global_tid..addr, align 8
241 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
246 declare void @p0() #4
248 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 {
250 %.addr = alloca i16, align 2
251 %.addr1 = alloca i32, align 4
252 %.zero.addr = alloca i32, align 4
253 %global_args = alloca ptr, align 8
254 store i32 0, ptr %.zero.addr, align 4
255 store i16 %0, ptr %.addr, align 2
256 store i32 %1, ptr %.addr1, align 4
257 call void @__kmpc_get_shared_variables(ptr %global_args)
258 call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
262 declare void @__kmpc_get_shared_variables(ptr)
264 declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
266 define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
268 %.global_tid..addr = alloca ptr, align 8
269 %.bound_tid..addr = alloca ptr, align 8
270 store ptr %.global_tid., ptr %.global_tid..addr, align 8
271 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
276 declare void @p1() #4
278 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
280 %.addr = alloca i16, align 2
281 %.addr1 = alloca i32, align 4
282 %.zero.addr = alloca i32, align 4
283 %global_args = alloca ptr, align 8
284 store i32 0, ptr %.zero.addr, align 4
285 store i16 %0, ptr %.addr, align 2
286 store i32 %1, ptr %.addr1, align 4
287 call void @__kmpc_get_shared_variables(ptr %global_args)
288 call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
292 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
294 %.zero.addr = alloca i32, align 4
295 %.threadid_temp. = alloca i32, align 4
296 store i32 0, ptr %.zero.addr, align 4
297 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn)
298 %exec_user_code = icmp eq i32 %0, -1
299 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
301 user_code.entry: ; preds = %entry
302 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
303 store i32 %1, ptr %.threadid_temp., align 4
304 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
305 call void @__kmpc_target_deinit()
308 worker.exit: ; preds = %entry
312 define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
314 %.global_tid..addr = alloca ptr, align 8
315 %.bound_tid..addr = alloca ptr, align 8
316 %captured_vars_addrs = alloca [0 x ptr], align 8
317 store ptr %.global_tid., ptr %.global_tid..addr, align 8
318 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
319 call void @unknown_no_openmp() #8
320 call void @simple_state_machine_interprocedural_before() #7
321 call void @no_parallel_region_in_here() #7
322 %0 = load ptr, ptr %.global_tid..addr, align 8
323 %1 = load i32, ptr %0, align 4
324 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
325 call void @simple_state_machine_interprocedural_after() #7
329 define hidden void @simple_state_machine_interprocedural_before() #1 {
331 %captured_vars_addrs = alloca [0 x ptr], align 8
332 %0 = call i32 @__kmpc_global_thread_num(ptr @2)
333 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
337 define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
339 %.global_tid..addr = alloca ptr, align 8
340 %.bound_tid..addr = alloca ptr, align 8
341 store ptr %.global_tid., ptr %.global_tid..addr, align 8
342 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
347 define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
349 %.addr = alloca i16, align 2
350 %.addr1 = alloca i32, align 4
351 %.zero.addr = alloca i32, align 4
352 %global_args = alloca ptr, align 8
353 store i32 0, ptr %.zero.addr, align 4
354 store i16 %0, ptr %.addr, align 2
355 store i32 %1, ptr %.addr1, align 4
356 call void @__kmpc_get_shared_variables(ptr %global_args)
357 call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
361 define hidden void @simple_state_machine_interprocedural_after() #1 {
363 %captured_vars_addrs = alloca [0 x ptr], align 8
364 %0 = call i32 @__kmpc_global_thread_num(ptr @2)
365 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
369 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
371 %.zero.addr = alloca i32, align 4
372 %.threadid_temp. = alloca i32, align 4
373 store i32 0, ptr %.zero.addr, align 4
374 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn)
375 %exec_user_code = icmp eq i32 %0, -1
376 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
378 user_code.entry: ; preds = %entry
379 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
380 store i32 %1, ptr %.threadid_temp., align 4
381 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
382 call void @__kmpc_target_deinit()
385 worker.exit: ; preds = %entry
389 define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
391 %.global_tid..addr = alloca ptr, align 8
392 %.bound_tid..addr = alloca ptr, align 8
393 %captured_vars_addrs = alloca [0 x ptr], align 8
394 %captured_vars_addrs1 = alloca [0 x ptr], align 8
395 store ptr %.global_tid., ptr %.global_tid..addr, align 8
396 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
397 %0 = load ptr, ptr %.global_tid..addr, align 8
398 %1 = load i32, ptr %0, align 4
399 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
400 %call = call i32 @unknown() #7
401 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
405 define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
407 %.global_tid..addr = alloca ptr, align 8
408 %.bound_tid..addr = alloca ptr, align 8
409 store ptr %.global_tid., ptr %.global_tid..addr, align 8
410 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
415 define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
417 %.addr = alloca i16, align 2
418 %.addr1 = alloca i32, align 4
419 %.zero.addr = alloca i32, align 4
420 %global_args = alloca ptr, align 8
421 store i32 0, ptr %.zero.addr, align 4
422 store i16 %0, ptr %.addr, align 2
423 store i32 %1, ptr %.addr1, align 4
424 call void @__kmpc_get_shared_variables(ptr %global_args)
425 call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
429 declare i32 @unknown() #4
431 define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
433 %.global_tid..addr = alloca ptr, align 8
434 %.bound_tid..addr = alloca ptr, align 8
435 store ptr %.global_tid., ptr %.global_tid..addr, align 8
436 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
441 define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 {
443 %.addr = alloca i16, align 2
444 %.addr1 = alloca i32, align 4
445 %.zero.addr = alloca i32, align 4
446 %global_args = alloca ptr, align 8
447 store i32 0, ptr %.zero.addr, align 4
448 store i16 %0, ptr %.addr, align 2
449 store i32 %1, ptr %.addr1, align 4
450 call void @__kmpc_get_shared_variables(ptr %global_args)
451 call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
455 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
457 %.zero.addr = alloca i32, align 4
458 %.threadid_temp. = alloca i32, align 4
459 store i32 0, ptr %.zero.addr, align 4
460 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn)
461 %exec_user_code = icmp eq i32 %0, -1
462 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
464 user_code.entry: ; preds = %entry
465 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
466 store i32 %1, ptr %.threadid_temp., align 4
467 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
468 call void @__kmpc_target_deinit()
471 worker.exit: ; preds = %entry
475 define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
477 %.global_tid..addr = alloca ptr, align 8
478 %.bound_tid..addr = alloca ptr, align 8
479 %captured_vars_addrs = alloca [0 x ptr], align 8
480 %captured_vars_addrs1 = alloca [0 x ptr], align 8
481 store ptr %.global_tid., ptr %.global_tid..addr, align 8
482 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
483 %0 = load ptr, ptr %.global_tid..addr, align 8
484 %1 = load i32, ptr %0, align 4
485 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
486 call void @unknown_no_openmp() #8
487 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
491 define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
493 %.global_tid..addr = alloca ptr, align 8
494 %.bound_tid..addr = alloca ptr, align 8
495 store ptr %.global_tid., ptr %.global_tid..addr, align 8
496 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
501 define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 {
503 %.addr = alloca i16, align 2
504 %.addr1 = alloca i32, align 4
505 %.zero.addr = alloca i32, align 4
506 %global_args = alloca ptr, align 8
507 store i32 0, ptr %.zero.addr, align 4
508 store i16 %0, ptr %.addr, align 2
509 store i32 %1, ptr %.addr1, align 4
510 call void @__kmpc_get_shared_variables(ptr %global_args)
511 call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
515 define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
517 %.global_tid..addr = alloca ptr, align 8
518 %.bound_tid..addr = alloca ptr, align 8
519 store ptr %.global_tid., ptr %.global_tid..addr, align 8
520 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
525 define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 {
527 %.addr = alloca i16, align 2
528 %.addr1 = alloca i32, align 4
529 %.zero.addr = alloca i32, align 4
530 %global_args = alloca ptr, align 8
531 store i32 0, ptr %.zero.addr, align 4
532 store i16 %0, ptr %.addr, align 2
533 store i32 %1, ptr %.addr1, align 4
534 call void @__kmpc_get_shared_variables(ptr %global_args)
535 call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
539 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
541 %.zero.addr = alloca i32, align 4
542 %.threadid_temp. = alloca i32, align 4
543 store i32 0, ptr %.zero.addr, align 4
544 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn)
545 %exec_user_code = icmp eq i32 %0, -1
546 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
548 user_code.entry: ; preds = %entry
549 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
550 store i32 %1, ptr %.threadid_temp., align 4
551 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
552 call void @__kmpc_target_deinit()
555 worker.exit: ; preds = %entry
559 define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
561 %.global_tid..addr = alloca ptr, align 8
562 %.bound_tid..addr = alloca ptr, align 8
563 %captured_vars_addrs = alloca [0 x ptr], align 8
564 %captured_vars_addrs1 = alloca [0 x ptr], align 8
565 store ptr %.global_tid., ptr %.global_tid..addr, align 8
566 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
567 call void @unknown_no_openmp() #8
568 %0 = load ptr, ptr %.global_tid..addr, align 8
569 %1 = load i32, ptr %0, align 4
570 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
571 call void @unknown_pure() #9
572 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
576 define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
578 %.global_tid..addr = alloca ptr, align 8
579 %.bound_tid..addr = alloca ptr, align 8
580 store ptr %.global_tid., ptr %.global_tid..addr, align 8
581 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
586 define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 {
588 %.addr = alloca i16, align 2
589 %.addr1 = alloca i32, align 4
590 %.zero.addr = alloca i32, align 4
591 %global_args = alloca ptr, align 8
592 store i32 0, ptr %.zero.addr, align 4
593 store i16 %0, ptr %.addr, align 2
594 store i32 %1, ptr %.addr1, align 4
595 call void @__kmpc_get_shared_variables(ptr %global_args)
596 call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
600 declare void @unknown_pure() #5
602 define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
604 %.global_tid..addr = alloca ptr, align 8
605 %.bound_tid..addr = alloca ptr, align 8
606 store ptr %.global_tid., ptr %.global_tid..addr, align 8
607 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
612 define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 {
614 %.addr = alloca i16, align 2
615 %.addr1 = alloca i32, align 4
616 %.zero.addr = alloca i32, align 4
617 %global_args = alloca ptr, align 8
618 store i32 0, ptr %.zero.addr, align 4
619 store i16 %0, ptr %.addr, align 2
620 store i32 %1, ptr %.addr1, align 4
621 call void @__kmpc_get_shared_variables(ptr %global_args)
622 call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
626 define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
628 %.zero.addr = alloca i32, align 4
629 %.threadid_temp. = alloca i32, align 4
630 store i32 0, ptr %.zero.addr, align 4
631 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn)
632 %exec_user_code = icmp eq i32 %0, -1
633 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
635 user_code.entry: ; preds = %entry
636 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
637 store i32 %1, ptr %.threadid_temp., align 4
638 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
639 call void @__kmpc_target_deinit()
642 worker.exit: ; preds = %entry
646 define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
648 %.global_tid..addr = alloca ptr, align 8
649 %.bound_tid..addr = alloca ptr, align 8
650 store ptr %.global_tid., ptr %.global_tid..addr, align 8
651 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
652 %call = call i32 @omp_get_thread_num() #7
653 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
657 define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
659 %a.addr = alloca i32, align 4
660 store i32 %a, ptr %a.addr, align 4
661 %0 = load i32, ptr %a.addr, align 4
662 %cmp = icmp eq i32 %0, 0
663 br i1 %cmp, label %if.then, label %if.end
665 if.then: ; preds = %entry
668 if.end: ; preds = %entry
669 %1 = load i32, ptr %a.addr, align 4
670 %sub = sub nsw i32 %1, 1
671 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
672 call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
675 return: ; preds = %if.end, %if.then
679 declare i32 @omp_get_thread_num(...) #4
681 define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
683 %.zero.addr = alloca i32, align 4
684 %.threadid_temp. = alloca i32, align 4
685 store i32 0, ptr %.zero.addr, align 4
686 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn)
687 %exec_user_code = icmp eq i32 %0, -1
688 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
690 user_code.entry: ; preds = %entry
691 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
692 store i32 %1, ptr %.threadid_temp., align 4
693 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
694 call void @__kmpc_target_deinit()
697 worker.exit: ; preds = %entry
701 define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
703 %.global_tid..addr = alloca ptr, align 8
704 %.bound_tid..addr = alloca ptr, align 8
705 store ptr %.global_tid., ptr %.global_tid..addr, align 8
706 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
707 call void @weak_callee_empty() #7
711 define weak hidden void @weak_callee_empty() #1 {
716 declare i32 @__kmpc_single(ptr, i32) #6
718 declare void @__kmpc_end_single(ptr, i32) #6
720 declare void @__kmpc_barrier(ptr, i32) #6
722 define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
724 %.global_tid..addr = alloca ptr, align 8
725 %.bound_tid..addr = alloca ptr, align 8
726 store ptr %.global_tid., ptr %.global_tid..addr, align 8
727 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
732 define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 {
734 %.addr = alloca i16, align 2
735 %.addr1 = alloca i32, align 4
736 %.zero.addr = alloca i32, align 4
737 %global_args = alloca ptr, align 8
738 store i32 0, ptr %.zero.addr, align 4
739 store i16 %0, ptr %.addr, align 2
740 store i32 %1, ptr %.addr1, align 4
741 call void @__kmpc_get_shared_variables(ptr %global_args)
742 call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
746 define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
748 %.global_tid..addr = alloca ptr, align 8
749 %.bound_tid..addr = alloca ptr, align 8
750 store ptr %.global_tid., ptr %.global_tid..addr, align 8
751 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
756 define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 {
758 %.addr = alloca i16, align 2
759 %.addr1 = alloca i32, align 4
760 %.zero.addr = alloca i32, align 4
761 %global_args = alloca ptr, align 8
762 store i32 0, ptr %.zero.addr, align 4
763 store i16 %0, ptr %.addr, align 2
764 store i32 %1, ptr %.addr1, align 4
765 call void @__kmpc_get_shared_variables(ptr %global_args)
766 call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
770 define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
772 %captured_vars_addrs = alloca [0 x ptr], align 8
773 %0 = call i32 @__kmpc_global_thread_num(ptr @2)
774 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
778 define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
780 %.global_tid..addr = alloca ptr, align 8
781 %.bound_tid..addr = alloca ptr, align 8
782 store ptr %.global_tid., ptr %.global_tid..addr, align 8
783 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
788 define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 {
790 %.addr = alloca i16, align 2
791 %.addr1 = alloca i32, align 4
792 %.zero.addr = alloca i32, align 4
793 %global_args = alloca ptr, align 8
794 store i32 0, ptr %.zero.addr, align 4
795 store i16 %0, ptr %.addr, align 2
796 store i32 %1, ptr %.addr1, align 4
797 call void @__kmpc_get_shared_variables(ptr %global_args)
798 call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
802 attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
803 attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
804 attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
805 attributes #3 = { nounwind }
806 attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
807 attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
808 attributes #6 = { convergent nounwind }
809 attributes #7 = { convergent }
810 attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
811 attributes #9 = { convergent nounwind readonly willreturn }
813 !omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
814 !llvm.module.flags = !{!16, !17, !18}
816 !0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
817 !1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
818 !2 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
819 !3 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
820 !4 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
821 !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
822 !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
823 !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
824 !16 = !{i32 1, !"wchar_size", i32 4}
825 !17 = !{i32 7, !"openmp", i32 50}
826 !18 = !{i32 7, !"openmp-device", i32 50}
828 ; AMDGPU1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
829 ; AMDGPU1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
830 ; AMDGPU1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
831 ; AMDGPU1: @G = external global i32, align 4
832 ; AMDGPU1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
833 ; AMDGPU1: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
834 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
835 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
836 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
837 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
838 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
839 ; AMDGPU1: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
840 ; AMDGPU1: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
842 ; NVPTX1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
843 ; NVPTX1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
844 ; NVPTX1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
845 ; NVPTX1: @G = external global i32, align 4
846 ; NVPTX1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
847 ; NVPTX1: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
848 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
849 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
850 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
851 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
852 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
853 ; NVPTX1: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
854 ; NVPTX1: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
856 ; AMDGPU2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
857 ; AMDGPU2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
858 ; AMDGPU2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
859 ; AMDGPU2: @G = external global i32, align 4
860 ; AMDGPU2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
861 ; AMDGPU2: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
862 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
863 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
864 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
865 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
866 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
867 ; AMDGPU2: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
868 ; AMDGPU2: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
870 ; AMDGPU3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
871 ; AMDGPU3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
872 ; AMDGPU3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
873 ; AMDGPU3: @G = external global i32, align 4
874 ; AMDGPU3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
875 ; AMDGPU3: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
876 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
877 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
878 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
879 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
880 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
881 ; AMDGPU3: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
882 ; AMDGPU3: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
884 ; NVPTX2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
885 ; NVPTX2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
886 ; NVPTX2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
887 ; NVPTX2: @G = external global i32, align 4
888 ; NVPTX2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
889 ; NVPTX2: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
890 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
891 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
892 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
893 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
894 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
895 ; NVPTX2: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
896 ; NVPTX2: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
898 ; NVPTX3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
899 ; NVPTX3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
900 ; NVPTX3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
901 ; NVPTX3: @G = external global i32, align 4
902 ; NVPTX3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
903 ; NVPTX3: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
904 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
905 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
906 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
907 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
908 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
909 ; NVPTX3: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
910 ; NVPTX3: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
912 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
913 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
914 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
915 ; AMDGPU1-NEXT: entry:
916 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
917 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
918 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
919 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
920 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
921 ; AMDGPU1: user_code.entry:
922 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
923 ; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
924 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
925 ; AMDGPU1-NEXT: ret void
926 ; AMDGPU1: worker.exit:
927 ; AMDGPU1-NEXT: ret void
930 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
931 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__
932 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
933 ; AMDGPU1-NEXT: entry:
934 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
935 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
936 ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
937 ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
938 ; AMDGPU1-NEXT: ret void
941 ; AMDGPU1: Function Attrs: convergent noinline nounwind
942 ; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
943 ; AMDGPU1-SAME: () #[[ATTR1:[0-9]+]] {
944 ; AMDGPU1-NEXT: entry:
945 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
946 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
947 ; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
948 ; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
949 ; AMDGPU1: omp_if.then:
950 ; AMDGPU1-NEXT: store i32 0, ptr @G, align 4
951 ; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
952 ; AMDGPU1-NEXT: br label [[OMP_IF_END]]
953 ; AMDGPU1: omp_if.end:
954 ; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
955 ; AMDGPU1-NEXT: ret void
958 ; AMDGPU1: Function Attrs: convergent noinline nounwind
959 ; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
960 ; AMDGPU1-SAME: () #[[ATTR1]] {
961 ; AMDGPU1-NEXT: entry:
962 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
963 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
964 ; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
965 ; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
966 ; AMDGPU1: omp_if.then:
967 ; AMDGPU1-NEXT: store i32 0, ptr @G, align 4
968 ; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
969 ; AMDGPU1-NEXT: br label [[OMP_IF_END]]
970 ; AMDGPU1: omp_if.end:
971 ; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
972 ; AMDGPU1-NEXT: ret void
975 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
976 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
977 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
978 ; AMDGPU1-NEXT: entry:
979 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
980 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
981 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
982 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
983 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
984 ; AMDGPU1: user_code.entry:
985 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
986 ; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
987 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
988 ; AMDGPU1-NEXT: ret void
989 ; AMDGPU1: worker.exit:
990 ; AMDGPU1-NEXT: ret void
993 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
994 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1
995 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
996 ; AMDGPU1-NEXT: entry:
997 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
998 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
999 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1000 ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1001 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1002 ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1003 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1004 ; AMDGPU1-NEXT: ret void
1007 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1008 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2
1009 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1010 ; AMDGPU1-NEXT: entry:
1011 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1012 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1013 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
1014 ; AMDGPU1-NEXT: ret void
1017 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1018 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
1019 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1020 ; AMDGPU1-NEXT: entry:
1021 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1022 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1023 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1024 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1025 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1026 ; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1027 ; AMDGPU1-NEXT: ret void
1030 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1031 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3
1032 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1033 ; AMDGPU1-NEXT: entry:
1034 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1035 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1036 ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
1037 ; AMDGPU1-NEXT: ret void
1040 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1041 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1042 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1043 ; AMDGPU1-NEXT: entry:
1044 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1045 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1046 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1047 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1048 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1049 ; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1050 ; AMDGPU1-NEXT: ret void
1053 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1054 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
1055 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1056 ; AMDGPU1-NEXT: entry:
1057 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1058 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1059 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
1060 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1061 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1062 ; AMDGPU1: user_code.entry:
1063 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1064 ; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1065 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1066 ; AMDGPU1-NEXT: ret void
1067 ; AMDGPU1: worker.exit:
1068 ; AMDGPU1-NEXT: ret void
1071 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1072 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4
1073 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1074 ; AMDGPU1-NEXT: entry:
1075 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1076 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1077 ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1078 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
1079 ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1080 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1081 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
1082 ; AMDGPU1-NEXT: ret void
1085 ; AMDGPU1: Function Attrs: noinline nounwind
1086 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
1087 ; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] {
1088 ; AMDGPU1-NEXT: entry:
1089 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1090 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1091 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1092 ; AMDGPU1-NEXT: ret void
1095 ; AMDGPU1: Function Attrs: convergent noinline nounwind
1096 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
1097 ; AMDGPU1-SAME: () #[[ATTR1]] {
1098 ; AMDGPU1-NEXT: entry:
1099 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1100 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1101 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1102 ; AMDGPU1-NEXT: ret void
1105 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1106 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5
1107 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1108 ; AMDGPU1-NEXT: entry:
1109 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1110 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1111 ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
1112 ; AMDGPU1-NEXT: ret void
1115 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1116 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1117 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1118 ; AMDGPU1-NEXT: entry:
1119 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1120 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1121 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1122 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1123 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1124 ; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1125 ; AMDGPU1-NEXT: ret void
1128 ; AMDGPU1: Function Attrs: noinline nounwind
1129 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
1130 ; AMDGPU1-SAME: () #[[ATTR6]] {
1131 ; AMDGPU1-NEXT: entry:
1132 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1133 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1134 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1135 ; AMDGPU1-NEXT: ret void
1138 ; AMDGPU1: Function Attrs: convergent noinline nounwind
1139 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
1140 ; AMDGPU1-SAME: () #[[ATTR1]] {
1141 ; AMDGPU1-NEXT: entry:
1142 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1143 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1144 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1145 ; AMDGPU1-NEXT: ret void
1148 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1149 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
1150 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1151 ; AMDGPU1-NEXT: entry:
1152 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1153 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1154 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
1155 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1156 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1157 ; AMDGPU1: user_code.entry:
1158 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1159 ; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1160 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1161 ; AMDGPU1-NEXT: ret void
1162 ; AMDGPU1: worker.exit:
1163 ; AMDGPU1-NEXT: ret void
1166 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1167 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6
1168 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1169 ; AMDGPU1-NEXT: entry:
1170 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1171 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1172 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1173 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1174 ; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
1175 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1176 ; AMDGPU1-NEXT: ret void
1179 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1180 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7
1181 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1182 ; AMDGPU1-NEXT: entry:
1183 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1184 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1185 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1186 ; AMDGPU1-NEXT: ret void
1189 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1190 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
1191 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1192 ; AMDGPU1-NEXT: entry:
1193 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1194 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1195 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1196 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1197 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1198 ; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1199 ; AMDGPU1-NEXT: ret void
1202 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1203 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8
1204 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1205 ; AMDGPU1-NEXT: entry:
1206 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1207 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1208 ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
1209 ; AMDGPU1-NEXT: ret void
1212 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1213 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
1214 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1215 ; AMDGPU1-NEXT: entry:
1216 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1217 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1218 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1219 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1220 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1221 ; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1222 ; AMDGPU1-NEXT: ret void
1225 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1226 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
1227 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1228 ; AMDGPU1-NEXT: entry:
1229 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1230 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1231 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
1232 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1233 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1234 ; AMDGPU1: user_code.entry:
1235 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1236 ; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1237 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1238 ; AMDGPU1-NEXT: ret void
1239 ; AMDGPU1: worker.exit:
1240 ; AMDGPU1-NEXT: ret void
1243 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1244 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9
1245 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1246 ; AMDGPU1-NEXT: entry:
1247 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1248 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1249 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1250 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1251 ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1252 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1253 ; AMDGPU1-NEXT: ret void
1256 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1257 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10
1258 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1259 ; AMDGPU1-NEXT: entry:
1260 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1261 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1262 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1263 ; AMDGPU1-NEXT: ret void
1266 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1267 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
1268 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1269 ; AMDGPU1-NEXT: entry:
1270 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1271 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1272 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1273 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1274 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1275 ; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1276 ; AMDGPU1-NEXT: ret void
1279 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1280 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11
1281 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1282 ; AMDGPU1-NEXT: entry:
1283 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1284 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1285 ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
1286 ; AMDGPU1-NEXT: ret void
1289 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1290 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
1291 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1292 ; AMDGPU1-NEXT: entry:
1293 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1294 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1295 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1296 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1297 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1298 ; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1299 ; AMDGPU1-NEXT: ret void
1302 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1303 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
1304 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1305 ; AMDGPU1-NEXT: entry:
1306 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1307 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1308 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
1309 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1310 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1311 ; AMDGPU1: user_code.entry:
1312 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1313 ; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1314 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1315 ; AMDGPU1-NEXT: ret void
1316 ; AMDGPU1: worker.exit:
1317 ; AMDGPU1-NEXT: ret void
1320 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1321 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12
1322 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1323 ; AMDGPU1-NEXT: entry:
1324 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1325 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1326 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1327 ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1328 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1329 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1330 ; AMDGPU1-NEXT: ret void
1333 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1334 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13
1335 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1336 ; AMDGPU1-NEXT: entry:
1337 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1338 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1339 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1340 ; AMDGPU1-NEXT: ret void
1343 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1344 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
1345 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1346 ; AMDGPU1-NEXT: entry:
1347 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1348 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1349 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1350 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1351 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1352 ; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1353 ; AMDGPU1-NEXT: ret void
1356 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1357 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14
1358 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1359 ; AMDGPU1-NEXT: entry:
1360 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1361 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1362 ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
1363 ; AMDGPU1-NEXT: ret void
1366 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1367 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
1368 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1369 ; AMDGPU1-NEXT: entry:
1370 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1371 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1372 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1373 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1374 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1375 ; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1376 ; AMDGPU1-NEXT: ret void
1379 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1380 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
1381 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1382 ; AMDGPU1-NEXT: entry:
1383 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1384 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1385 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
1386 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1387 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1388 ; AMDGPU1: user_code.entry:
1389 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1390 ; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1391 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1392 ; AMDGPU1-NEXT: ret void
1393 ; AMDGPU1: worker.exit:
1394 ; AMDGPU1-NEXT: ret void
1397 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1398 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15
1399 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1400 ; AMDGPU1-NEXT: entry:
1401 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1402 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1403 ; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
1404 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
1405 ; AMDGPU1-NEXT: ret void
1408 ; AMDGPU1: Function Attrs: noinline nounwind
1409 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
1410 ; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
1411 ; AMDGPU1-NEXT: entry:
1412 ; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
1413 ; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
1414 ; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
1415 ; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
1416 ; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
1418 ; AMDGPU1-NEXT: br label [[RETURN:%.*]]
1420 ; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1421 ; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
1422 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
1423 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
1424 ; AMDGPU1-NEXT: br label [[RETURN]]
1426 ; AMDGPU1-NEXT: ret void
1429 ; AMDGPU1: Function Attrs: convergent noinline nounwind
1430 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
1431 ; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
1432 ; AMDGPU1-NEXT: entry:
1433 ; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
1434 ; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
1435 ; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
1436 ; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
1437 ; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
1439 ; AMDGPU1-NEXT: br label [[RETURN:%.*]]
1441 ; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1442 ; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
1443 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
1444 ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
1445 ; AMDGPU1-NEXT: br label [[RETURN]]
1447 ; AMDGPU1-NEXT: ret void
1450 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1451 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
1452 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1453 ; AMDGPU1-NEXT: entry:
1454 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1455 ; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1456 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
1457 ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1458 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1459 ; AMDGPU1: user_code.entry:
1460 ; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1461 ; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1462 ; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
1463 ; AMDGPU1-NEXT: ret void
1464 ; AMDGPU1: worker.exit:
1465 ; AMDGPU1-NEXT: ret void
1468 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1469 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16
1470 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1471 ; AMDGPU1-NEXT: entry:
1472 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1473 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1474 ; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
1475 ; AMDGPU1-NEXT: ret void
1478 ; AMDGPU1: Function Attrs: convergent noinline nounwind
1479 ; AMDGPU1-LABEL: define {{[^@]+}}@weak_callee_empty
1480 ; AMDGPU1-SAME: () #[[ATTR1]] {
1481 ; AMDGPU1-NEXT: entry:
1482 ; AMDGPU1-NEXT: ret void
1485 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1486 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17
1487 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1488 ; AMDGPU1-NEXT: entry:
1489 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1490 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1491 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1492 ; AMDGPU1-NEXT: ret void
1495 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1496 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
1497 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1498 ; AMDGPU1-NEXT: entry:
1499 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1500 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1501 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1502 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1503 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1504 ; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1505 ; AMDGPU1-NEXT: ret void
1508 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1509 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18
1510 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1511 ; AMDGPU1-NEXT: entry:
1512 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1513 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1514 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1515 ; AMDGPU1-NEXT: ret void
1518 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1519 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
1520 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1521 ; AMDGPU1-NEXT: entry:
1522 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1523 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1524 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1525 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1526 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1527 ; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1528 ; AMDGPU1-NEXT: ret void
1531 ; AMDGPU1: Function Attrs: noinline nounwind
1532 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
1533 ; AMDGPU1-SAME: () #[[ATTR6]] {
1534 ; AMDGPU1-NEXT: entry:
1535 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1536 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1537 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1538 ; AMDGPU1-NEXT: ret void
1541 ; AMDGPU1: Function Attrs: convergent noinline nounwind
1542 ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
1543 ; AMDGPU1-SAME: () #[[ATTR1]] {
1544 ; AMDGPU1-NEXT: entry:
1545 ; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1546 ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1547 ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1548 ; AMDGPU1-NEXT: ret void
1551 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1552 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19
1553 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1554 ; AMDGPU1-NEXT: entry:
1555 ; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1556 ; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1557 ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
1558 ; AMDGPU1-NEXT: ret void
1561 ; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
1562 ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
1563 ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1564 ; AMDGPU1-NEXT: entry:
1565 ; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1566 ; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1567 ; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1568 ; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1569 ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1570 ; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1571 ; AMDGPU1-NEXT: ret void
1574 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1575 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
1576 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
1577 ; NVPTX1-NEXT: entry:
1578 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1579 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1580 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
1581 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1582 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1583 ; NVPTX1: user_code.entry:
1584 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
1585 ; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1586 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1587 ; NVPTX1-NEXT: ret void
1588 ; NVPTX1: worker.exit:
1589 ; NVPTX1-NEXT: ret void
1592 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1593 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__
1594 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1595 ; NVPTX1-NEXT: entry:
1596 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1597 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1598 ; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
1599 ; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
1600 ; NVPTX1-NEXT: ret void
1603 ; NVPTX1: Function Attrs: convergent noinline nounwind
1604 ; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
1605 ; NVPTX1-SAME: () #[[ATTR1:[0-9]+]] {
1606 ; NVPTX1-NEXT: entry:
1607 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1608 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
1609 ; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
1610 ; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
1611 ; NVPTX1: omp_if.then:
1612 ; NVPTX1-NEXT: store i32 0, ptr @G, align 4
1613 ; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
1614 ; NVPTX1-NEXT: br label [[OMP_IF_END]]
1615 ; NVPTX1: omp_if.end:
1616 ; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
1617 ; NVPTX1-NEXT: ret void
1620 ; NVPTX1: Function Attrs: convergent noinline nounwind
1621 ; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
1622 ; NVPTX1-SAME: () #[[ATTR1]] {
1623 ; NVPTX1-NEXT: entry:
1624 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1625 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
1626 ; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
1627 ; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
1628 ; NVPTX1: omp_if.then:
1629 ; NVPTX1-NEXT: store i32 0, ptr @G, align 4
1630 ; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
1631 ; NVPTX1-NEXT: br label [[OMP_IF_END]]
1632 ; NVPTX1: omp_if.end:
1633 ; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
1634 ; NVPTX1-NEXT: ret void
1637 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1638 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
1639 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1640 ; NVPTX1-NEXT: entry:
1641 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1642 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1643 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
1644 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1645 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1646 ; NVPTX1: user_code.entry:
1647 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1648 ; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1649 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1650 ; NVPTX1-NEXT: ret void
1651 ; NVPTX1: worker.exit:
1652 ; NVPTX1-NEXT: ret void
1655 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1656 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1
1657 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1658 ; NVPTX1-NEXT: entry:
1659 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1660 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1661 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1662 ; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1663 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1664 ; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1665 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1666 ; NVPTX1-NEXT: ret void
1669 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1670 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2
1671 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1672 ; NVPTX1-NEXT: entry:
1673 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1674 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1675 ; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
1676 ; NVPTX1-NEXT: ret void
1679 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1680 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
1681 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1682 ; NVPTX1-NEXT: entry:
1683 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1684 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1685 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1686 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1687 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1688 ; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1689 ; NVPTX1-NEXT: ret void
1692 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1693 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3
1694 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1695 ; NVPTX1-NEXT: entry:
1696 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1697 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1698 ; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
1699 ; NVPTX1-NEXT: ret void
1702 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1703 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1704 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1705 ; NVPTX1-NEXT: entry:
1706 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1707 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1708 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1709 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1710 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1711 ; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1712 ; NVPTX1-NEXT: ret void
1715 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1716 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
1717 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1718 ; NVPTX1-NEXT: entry:
1719 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1720 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1721 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
1722 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1723 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1724 ; NVPTX1: user_code.entry:
1725 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1726 ; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1727 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1728 ; NVPTX1-NEXT: ret void
1729 ; NVPTX1: worker.exit:
1730 ; NVPTX1-NEXT: ret void
1733 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1734 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4
1735 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1736 ; NVPTX1-NEXT: entry:
1737 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1738 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1739 ; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1740 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
1741 ; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1742 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1743 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
1744 ; NVPTX1-NEXT: ret void
1747 ; NVPTX1: Function Attrs: noinline nounwind
1748 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
1749 ; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] {
1750 ; NVPTX1-NEXT: entry:
1751 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1752 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1753 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1754 ; NVPTX1-NEXT: ret void
1757 ; NVPTX1: Function Attrs: convergent noinline nounwind
1758 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
1759 ; NVPTX1-SAME: () #[[ATTR1]] {
1760 ; NVPTX1-NEXT: entry:
1761 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1762 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1763 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1764 ; NVPTX1-NEXT: ret void
1767 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1768 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5
1769 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1770 ; NVPTX1-NEXT: entry:
1771 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1772 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1773 ; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
1774 ; NVPTX1-NEXT: ret void
1777 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1778 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1779 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1780 ; NVPTX1-NEXT: entry:
1781 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1782 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1783 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1784 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1785 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1786 ; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1787 ; NVPTX1-NEXT: ret void
1790 ; NVPTX1: Function Attrs: noinline nounwind
1791 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
1792 ; NVPTX1-SAME: () #[[ATTR6]] {
1793 ; NVPTX1-NEXT: entry:
1794 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1795 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1796 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1797 ; NVPTX1-NEXT: ret void
1800 ; NVPTX1: Function Attrs: convergent noinline nounwind
1801 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
1802 ; NVPTX1-SAME: () #[[ATTR1]] {
1803 ; NVPTX1-NEXT: entry:
1804 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1805 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1806 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1807 ; NVPTX1-NEXT: ret void
1810 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1811 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
1812 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1813 ; NVPTX1-NEXT: entry:
1814 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1815 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1816 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
1817 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1818 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1819 ; NVPTX1: user_code.entry:
1820 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1821 ; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1822 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1823 ; NVPTX1-NEXT: ret void
1824 ; NVPTX1: worker.exit:
1825 ; NVPTX1-NEXT: ret void
1828 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1829 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6
1830 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1831 ; NVPTX1-NEXT: entry:
1832 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1833 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1834 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1835 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1836 ; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
1837 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1838 ; NVPTX1-NEXT: ret void
1841 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1842 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7
1843 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1844 ; NVPTX1-NEXT: entry:
1845 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1846 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1847 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
1848 ; NVPTX1-NEXT: ret void
1851 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1852 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
1853 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1854 ; NVPTX1-NEXT: entry:
1855 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1856 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1857 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1858 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1859 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1860 ; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1861 ; NVPTX1-NEXT: ret void
1864 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1865 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8
1866 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1867 ; NVPTX1-NEXT: entry:
1868 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1869 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1870 ; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
1871 ; NVPTX1-NEXT: ret void
1874 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1875 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
1876 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1877 ; NVPTX1-NEXT: entry:
1878 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1879 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1880 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1881 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1882 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1883 ; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1884 ; NVPTX1-NEXT: ret void
1887 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1888 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
1889 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1890 ; NVPTX1-NEXT: entry:
1891 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1892 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1893 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
1894 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1895 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1896 ; NVPTX1: user_code.entry:
1897 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1898 ; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1899 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1900 ; NVPTX1-NEXT: ret void
1901 ; NVPTX1: worker.exit:
1902 ; NVPTX1-NEXT: ret void
1905 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1906 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9
1907 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1908 ; NVPTX1-NEXT: entry:
1909 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1910 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1911 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1912 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1913 ; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1914 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1915 ; NVPTX1-NEXT: ret void
1918 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1919 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10
1920 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1921 ; NVPTX1-NEXT: entry:
1922 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1923 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1924 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
1925 ; NVPTX1-NEXT: ret void
1928 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1929 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
1930 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1931 ; NVPTX1-NEXT: entry:
1932 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1933 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1934 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1935 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1936 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1937 ; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1938 ; NVPTX1-NEXT: ret void
1941 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1942 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11
1943 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1944 ; NVPTX1-NEXT: entry:
1945 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1946 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1947 ; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
1948 ; NVPTX1-NEXT: ret void
1951 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1952 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
1953 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1954 ; NVPTX1-NEXT: entry:
1955 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1956 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1957 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1958 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1959 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1960 ; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1961 ; NVPTX1-NEXT: ret void
1964 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1965 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
1966 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1967 ; NVPTX1-NEXT: entry:
1968 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1969 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1970 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
1971 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1972 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1973 ; NVPTX1: user_code.entry:
1974 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1975 ; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1976 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
1977 ; NVPTX1-NEXT: ret void
1978 ; NVPTX1: worker.exit:
1979 ; NVPTX1-NEXT: ret void
1982 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1983 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12
1984 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1985 ; NVPTX1-NEXT: entry:
1986 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1987 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1988 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1989 ; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
1990 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1991 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1992 ; NVPTX1-NEXT: ret void
1995 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
1996 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13
1997 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1998 ; NVPTX1-NEXT: entry:
1999 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2000 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2001 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
2002 ; NVPTX1-NEXT: ret void
2005 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2006 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
2007 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2008 ; NVPTX1-NEXT: entry:
2009 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2010 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2011 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2012 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2013 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2014 ; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2015 ; NVPTX1-NEXT: ret void
2018 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2019 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14
2020 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2021 ; NVPTX1-NEXT: entry:
2022 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2023 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2024 ; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
2025 ; NVPTX1-NEXT: ret void
2028 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2029 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
2030 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2031 ; NVPTX1-NEXT: entry:
2032 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2033 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2034 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2035 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2036 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2037 ; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2038 ; NVPTX1-NEXT: ret void
2041 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2042 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
2043 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2044 ; NVPTX1-NEXT: entry:
2045 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2046 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2047 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
2048 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2049 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2050 ; NVPTX1: user_code.entry:
2051 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2052 ; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2053 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
2054 ; NVPTX1-NEXT: ret void
2055 ; NVPTX1: worker.exit:
2056 ; NVPTX1-NEXT: ret void
2059 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2060 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15
2061 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2062 ; NVPTX1-NEXT: entry:
2063 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2064 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2065 ; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
2066 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
2067 ; NVPTX1-NEXT: ret void
2070 ; NVPTX1: Function Attrs: noinline nounwind
2071 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
2072 ; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
2073 ; NVPTX1-NEXT: entry:
2074 ; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
2075 ; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
2076 ; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2077 ; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2078 ; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2080 ; NVPTX1-NEXT: br label [[RETURN:%.*]]
2082 ; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2083 ; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2084 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
2085 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
2086 ; NVPTX1-NEXT: br label [[RETURN]]
2088 ; NVPTX1-NEXT: ret void
2091 ; NVPTX1: Function Attrs: convergent noinline nounwind
2092 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
2093 ; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
2094 ; NVPTX1-NEXT: entry:
2095 ; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
2096 ; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
2097 ; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2098 ; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2099 ; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2101 ; NVPTX1-NEXT: br label [[RETURN:%.*]]
2103 ; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2104 ; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2105 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
2106 ; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
2107 ; NVPTX1-NEXT: br label [[RETURN]]
2109 ; NVPTX1-NEXT: ret void
2112 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2113 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
2114 ; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2115 ; NVPTX1-NEXT: entry:
2116 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2117 ; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2118 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
2119 ; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2120 ; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2121 ; NVPTX1: user_code.entry:
2122 ; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2123 ; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2124 ; NVPTX1-NEXT: call void @__kmpc_target_deinit()
2125 ; NVPTX1-NEXT: ret void
2126 ; NVPTX1: worker.exit:
2127 ; NVPTX1-NEXT: ret void
2130 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2131 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16
2132 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2133 ; NVPTX1-NEXT: entry:
2134 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2135 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2136 ; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
2137 ; NVPTX1-NEXT: ret void
2140 ; NVPTX1: Function Attrs: convergent noinline nounwind
2141 ; NVPTX1-LABEL: define {{[^@]+}}@weak_callee_empty
2142 ; NVPTX1-SAME: () #[[ATTR1]] {
2143 ; NVPTX1-NEXT: entry:
2144 ; NVPTX1-NEXT: ret void
2147 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2148 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17
2149 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2150 ; NVPTX1-NEXT: entry:
2151 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2152 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2153 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
2154 ; NVPTX1-NEXT: ret void
2157 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2158 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
2159 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2160 ; NVPTX1-NEXT: entry:
2161 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2162 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2163 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2164 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2165 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2166 ; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2167 ; NVPTX1-NEXT: ret void
2170 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2171 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18
2172 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2173 ; NVPTX1-NEXT: entry:
2174 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2175 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2176 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
2177 ; NVPTX1-NEXT: ret void
2180 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2181 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
2182 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2183 ; NVPTX1-NEXT: entry:
2184 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2185 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2186 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2187 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2188 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2189 ; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2190 ; NVPTX1-NEXT: ret void
2193 ; NVPTX1: Function Attrs: noinline nounwind
2194 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
2195 ; NVPTX1-SAME: () #[[ATTR6]] {
2196 ; NVPTX1-NEXT: entry:
2197 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2198 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2199 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2200 ; NVPTX1-NEXT: ret void
2203 ; NVPTX1: Function Attrs: convergent noinline nounwind
2204 ; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
2205 ; NVPTX1-SAME: () #[[ATTR1]] {
2206 ; NVPTX1-NEXT: entry:
2207 ; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2208 ; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2209 ; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2210 ; NVPTX1-NEXT: ret void
2213 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2214 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19
2215 ; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2216 ; NVPTX1-NEXT: entry:
2217 ; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2218 ; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2219 ; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
2220 ; NVPTX1-NEXT: ret void
2223 ; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
2224 ; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
2225 ; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2226 ; NVPTX1-NEXT: entry:
2227 ; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2228 ; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2229 ; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2230 ; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2231 ; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2232 ; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2233 ; NVPTX1-NEXT: ret void
2236 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2237 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
2238 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
2239 ; AMDGPU2-NEXT: entry:
2240 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2241 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2242 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
2243 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2244 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2245 ; AMDGPU2: user_code.entry:
2246 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
2247 ; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2248 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2249 ; AMDGPU2-NEXT: ret void
2250 ; AMDGPU2: worker.exit:
2251 ; AMDGPU2-NEXT: ret void
2254 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2255 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__
2256 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2257 ; AMDGPU2-NEXT: entry:
2258 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2259 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2260 ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
2261 ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
2262 ; AMDGPU2-NEXT: ret void
2265 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2266 ; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
2267 ; AMDGPU2-SAME: () #[[ATTR1:[0-9]+]] {
2268 ; AMDGPU2-NEXT: entry:
2269 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2270 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2271 ; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2272 ; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2273 ; AMDGPU2: omp_if.then:
2274 ; AMDGPU2-NEXT: store i32 0, ptr @G, align 4
2275 ; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2276 ; AMDGPU2-NEXT: br label [[OMP_IF_END]]
2277 ; AMDGPU2: omp_if.end:
2278 ; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
2279 ; AMDGPU2-NEXT: ret void
2282 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2283 ; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
2284 ; AMDGPU2-SAME: () #[[ATTR1]] {
2285 ; AMDGPU2-NEXT: entry:
2286 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2287 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
2288 ; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2289 ; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2290 ; AMDGPU2: omp_if.then:
2291 ; AMDGPU2-NEXT: store i32 0, ptr @G, align 4
2292 ; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
2293 ; AMDGPU2-NEXT: br label [[OMP_IF_END]]
2294 ; AMDGPU2: omp_if.end:
2295 ; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
2296 ; AMDGPU2-NEXT: ret void
2299 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2300 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
2301 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2302 ; AMDGPU2-NEXT: entry:
2303 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2304 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2305 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
2306 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2307 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2308 ; AMDGPU2: user_code.entry:
2309 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2310 ; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2311 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2312 ; AMDGPU2-NEXT: ret void
2313 ; AMDGPU2: worker.exit:
2314 ; AMDGPU2-NEXT: ret void
2317 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2318 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1
2319 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2320 ; AMDGPU2-NEXT: entry:
2321 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2322 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2323 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2324 ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
2325 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2326 ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2327 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2328 ; AMDGPU2-NEXT: ret void
2331 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2332 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2
2333 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2334 ; AMDGPU2-NEXT: entry:
2335 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2336 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2337 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
2338 ; AMDGPU2-NEXT: ret void
2341 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2342 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
2343 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2344 ; AMDGPU2-NEXT: entry:
2345 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2346 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2347 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2348 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2349 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2350 ; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2351 ; AMDGPU2-NEXT: ret void
2354 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2355 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3
2356 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2357 ; AMDGPU2-NEXT: entry:
2358 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2359 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2360 ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
2361 ; AMDGPU2-NEXT: ret void
2364 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2365 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
2366 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2367 ; AMDGPU2-NEXT: entry:
2368 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2369 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2370 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2371 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2372 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2373 ; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2374 ; AMDGPU2-NEXT: ret void
2377 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2378 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
2379 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2380 ; AMDGPU2-NEXT: entry:
2381 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2382 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2383 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
2384 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2385 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2386 ; AMDGPU2: user_code.entry:
2387 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2388 ; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2389 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2390 ; AMDGPU2-NEXT: ret void
2391 ; AMDGPU2: worker.exit:
2392 ; AMDGPU2-NEXT: ret void
2395 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2396 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4
2397 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2398 ; AMDGPU2-NEXT: entry:
2399 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2400 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2401 ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
2402 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
2403 ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2404 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2405 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
2406 ; AMDGPU2-NEXT: ret void
2409 ; AMDGPU2: Function Attrs: noinline nounwind
2410 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
2411 ; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] {
2412 ; AMDGPU2-NEXT: entry:
2413 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2414 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2415 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2416 ; AMDGPU2-NEXT: ret void
2419 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2420 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
2421 ; AMDGPU2-SAME: () #[[ATTR1]] {
2422 ; AMDGPU2-NEXT: entry:
2423 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2424 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2425 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2426 ; AMDGPU2-NEXT: ret void
2429 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2430 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5
2431 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2432 ; AMDGPU2-NEXT: entry:
2433 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2434 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2435 ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
2436 ; AMDGPU2-NEXT: ret void
2439 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2440 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2441 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2442 ; AMDGPU2-NEXT: entry:
2443 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2444 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2445 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2446 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2447 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2448 ; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2449 ; AMDGPU2-NEXT: ret void
2452 ; AMDGPU2: Function Attrs: noinline nounwind
2453 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
2454 ; AMDGPU2-SAME: () #[[ATTR6]] {
2455 ; AMDGPU2-NEXT: entry:
2456 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2457 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2458 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2459 ; AMDGPU2-NEXT: ret void
2462 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2463 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
2464 ; AMDGPU2-SAME: () #[[ATTR1]] {
2465 ; AMDGPU2-NEXT: entry:
2466 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2467 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2468 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2469 ; AMDGPU2-NEXT: ret void
2472 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2473 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
2474 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2475 ; AMDGPU2-NEXT: entry:
2476 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2477 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2478 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
2479 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2480 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2481 ; AMDGPU2: user_code.entry:
2482 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2483 ; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2484 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2485 ; AMDGPU2-NEXT: ret void
2486 ; AMDGPU2: worker.exit:
2487 ; AMDGPU2-NEXT: ret void
2490 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2491 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6
2492 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2493 ; AMDGPU2-NEXT: entry:
2494 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2495 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2496 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2497 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2498 ; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
2499 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2500 ; AMDGPU2-NEXT: ret void
2503 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2504 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7
2505 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2506 ; AMDGPU2-NEXT: entry:
2507 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2508 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2509 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2510 ; AMDGPU2-NEXT: ret void
2513 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2514 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2515 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2516 ; AMDGPU2-NEXT: entry:
2517 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2518 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2519 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2520 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2521 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2522 ; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2523 ; AMDGPU2-NEXT: ret void
2526 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2527 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8
2528 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2529 ; AMDGPU2-NEXT: entry:
2530 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2531 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2532 ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
2533 ; AMDGPU2-NEXT: ret void
2536 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2537 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
2538 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2539 ; AMDGPU2-NEXT: entry:
2540 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2541 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2542 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2543 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2544 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2545 ; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2546 ; AMDGPU2-NEXT: ret void
2549 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2550 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
2551 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2552 ; AMDGPU2-NEXT: entry:
2553 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2554 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2555 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
2556 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2557 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2558 ; AMDGPU2: user_code.entry:
2559 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2560 ; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2561 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2562 ; AMDGPU2-NEXT: ret void
2563 ; AMDGPU2: worker.exit:
2564 ; AMDGPU2-NEXT: ret void
2567 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2568 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9
2569 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2570 ; AMDGPU2-NEXT: entry:
2571 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2572 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2573 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2574 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2575 ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
2576 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2577 ; AMDGPU2-NEXT: ret void
2580 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2581 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10
2582 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2583 ; AMDGPU2-NEXT: entry:
2584 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2585 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2586 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2587 ; AMDGPU2-NEXT: ret void
2590 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2591 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
2592 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2593 ; AMDGPU2-NEXT: entry:
2594 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2595 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2596 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2597 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2598 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2599 ; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2600 ; AMDGPU2-NEXT: ret void
2603 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2604 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11
2605 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2606 ; AMDGPU2-NEXT: entry:
2607 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2608 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2609 ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
2610 ; AMDGPU2-NEXT: ret void
2613 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2614 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
2615 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2616 ; AMDGPU2-NEXT: entry:
2617 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2618 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2619 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2620 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2621 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2622 ; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2623 ; AMDGPU2-NEXT: ret void
2626 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2627 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
2628 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2629 ; AMDGPU2-NEXT: entry:
2630 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2631 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2632 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
2633 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2634 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2635 ; AMDGPU2: user_code.entry:
2636 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2637 ; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2638 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2639 ; AMDGPU2-NEXT: ret void
2640 ; AMDGPU2: worker.exit:
2641 ; AMDGPU2-NEXT: ret void
2644 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2645 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12
2646 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2647 ; AMDGPU2-NEXT: entry:
2648 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2649 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2650 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2651 ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
2652 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2653 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2654 ; AMDGPU2-NEXT: ret void
2657 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2658 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13
2659 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2660 ; AMDGPU2-NEXT: entry:
2661 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2662 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2663 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2664 ; AMDGPU2-NEXT: ret void
2667 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2668 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
2669 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2670 ; AMDGPU2-NEXT: entry:
2671 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2672 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2673 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2674 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2675 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2676 ; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2677 ; AMDGPU2-NEXT: ret void
2680 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2681 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14
2682 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2683 ; AMDGPU2-NEXT: entry:
2684 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2685 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2686 ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
2687 ; AMDGPU2-NEXT: ret void
2690 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2691 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
2692 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2693 ; AMDGPU2-NEXT: entry:
2694 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2695 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2696 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2697 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2698 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2699 ; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2700 ; AMDGPU2-NEXT: ret void
2703 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2704 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
2705 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2706 ; AMDGPU2-NEXT: entry:
2707 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2708 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2709 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
2710 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2711 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2712 ; AMDGPU2: user_code.entry:
2713 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2714 ; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2715 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2716 ; AMDGPU2-NEXT: ret void
2717 ; AMDGPU2: worker.exit:
2718 ; AMDGPU2-NEXT: ret void
2721 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2722 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15
2723 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2724 ; AMDGPU2-NEXT: entry:
2725 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2726 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2727 ; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
2728 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
2729 ; AMDGPU2-NEXT: ret void
2732 ; AMDGPU2: Function Attrs: noinline nounwind
2733 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
2734 ; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
2735 ; AMDGPU2-NEXT: entry:
2736 ; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
2737 ; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
2738 ; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2739 ; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2740 ; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2742 ; AMDGPU2-NEXT: br label [[RETURN:%.*]]
2744 ; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2745 ; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2746 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
2747 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
2748 ; AMDGPU2-NEXT: br label [[RETURN]]
2750 ; AMDGPU2-NEXT: ret void
2753 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2754 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
2755 ; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
2756 ; AMDGPU2-NEXT: entry:
2757 ; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
2758 ; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
2759 ; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2760 ; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2761 ; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2763 ; AMDGPU2-NEXT: br label [[RETURN:%.*]]
2765 ; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2766 ; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2767 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
2768 ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
2769 ; AMDGPU2-NEXT: br label [[RETURN]]
2771 ; AMDGPU2-NEXT: ret void
2774 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2775 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
2776 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2777 ; AMDGPU2-NEXT: entry:
2778 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2779 ; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2780 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
2781 ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2782 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2783 ; AMDGPU2: user_code.entry:
2784 ; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2785 ; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2786 ; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
2787 ; AMDGPU2-NEXT: ret void
2788 ; AMDGPU2: worker.exit:
2789 ; AMDGPU2-NEXT: ret void
2792 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2793 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16
2794 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2795 ; AMDGPU2-NEXT: entry:
2796 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2797 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2798 ; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
2799 ; AMDGPU2-NEXT: ret void
2802 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2803 ; AMDGPU2-LABEL: define {{[^@]+}}@weak_callee_empty
2804 ; AMDGPU2-SAME: () #[[ATTR1]] {
2805 ; AMDGPU2-NEXT: entry:
2806 ; AMDGPU2-NEXT: ret void
2809 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2810 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17
2811 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2812 ; AMDGPU2-NEXT: entry:
2813 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2814 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2815 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2816 ; AMDGPU2-NEXT: ret void
2819 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2820 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
2821 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2822 ; AMDGPU2-NEXT: entry:
2823 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2824 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2825 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2826 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2827 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2828 ; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2829 ; AMDGPU2-NEXT: ret void
2832 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2833 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18
2834 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2835 ; AMDGPU2-NEXT: entry:
2836 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2837 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2838 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2839 ; AMDGPU2-NEXT: ret void
2842 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2843 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
2844 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2845 ; AMDGPU2-NEXT: entry:
2846 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2847 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2848 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2849 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2850 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2851 ; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2852 ; AMDGPU2-NEXT: ret void
2855 ; AMDGPU2: Function Attrs: noinline nounwind
2856 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
2857 ; AMDGPU2-SAME: () #[[ATTR6]] {
2858 ; AMDGPU2-NEXT: entry:
2859 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2860 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2861 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2862 ; AMDGPU2-NEXT: ret void
2865 ; AMDGPU2: Function Attrs: convergent noinline nounwind
2866 ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
2867 ; AMDGPU2-SAME: () #[[ATTR1]] {
2868 ; AMDGPU2-NEXT: entry:
2869 ; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2870 ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2871 ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2872 ; AMDGPU2-NEXT: ret void
2875 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2876 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19
2877 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2878 ; AMDGPU2-NEXT: entry:
2879 ; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2880 ; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2881 ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
2882 ; AMDGPU2-NEXT: ret void
2885 ; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
2886 ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
2887 ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2888 ; AMDGPU2-NEXT: entry:
2889 ; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
2890 ; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2891 ; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2892 ; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2893 ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2894 ; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2895 ; AMDGPU2-NEXT: ret void
2898 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
2899 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
2900 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
2901 ; AMDGPU3-NEXT: entry:
2902 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2903 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2904 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
2905 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2906 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2907 ; AMDGPU3: user_code.entry:
2908 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
2909 ; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2910 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
2911 ; AMDGPU3-NEXT: ret void
2912 ; AMDGPU3: worker.exit:
2913 ; AMDGPU3-NEXT: ret void
2916 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
2917 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__
2918 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2919 ; AMDGPU3-NEXT: entry:
2920 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2921 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2922 ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
2923 ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
2924 ; AMDGPU3-NEXT: ret void
2927 ; AMDGPU3: Function Attrs: convergent noinline nounwind
2928 ; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
2929 ; AMDGPU3-SAME: () #[[ATTR1:[0-9]+]] {
2930 ; AMDGPU3-NEXT: entry:
2931 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2932 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2933 ; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2934 ; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2935 ; AMDGPU3: omp_if.then:
2936 ; AMDGPU3-NEXT: store i32 0, ptr @G, align 4
2937 ; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2938 ; AMDGPU3-NEXT: br label [[OMP_IF_END]]
2939 ; AMDGPU3: omp_if.end:
2940 ; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
2941 ; AMDGPU3-NEXT: ret void
2944 ; AMDGPU3: Function Attrs: convergent noinline nounwind
2945 ; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
2946 ; AMDGPU3-SAME: () #[[ATTR1]] {
2947 ; AMDGPU3-NEXT: entry:
2948 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2949 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
2950 ; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2951 ; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2952 ; AMDGPU3: omp_if.then:
2953 ; AMDGPU3-NEXT: store i32 0, ptr @G, align 4
2954 ; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
2955 ; AMDGPU3-NEXT: br label [[OMP_IF_END]]
2956 ; AMDGPU3: omp_if.end:
2957 ; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
2958 ; AMDGPU3-NEXT: ret void
2961 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
2962 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
2963 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2964 ; AMDGPU3-NEXT: entry:
2965 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2966 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2967 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
2968 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2969 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2970 ; AMDGPU3: user_code.entry:
2971 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2972 ; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2973 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
2974 ; AMDGPU3-NEXT: ret void
2975 ; AMDGPU3: worker.exit:
2976 ; AMDGPU3-NEXT: ret void
2979 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
2980 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1
2981 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2982 ; AMDGPU3-NEXT: entry:
2983 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2984 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2985 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2986 ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
2987 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2988 ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2989 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2990 ; AMDGPU3-NEXT: ret void
2993 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
2994 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2
2995 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2996 ; AMDGPU3-NEXT: entry:
2997 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2998 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2999 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
3000 ; AMDGPU3-NEXT: ret void
3003 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3004 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
3005 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3006 ; AMDGPU3-NEXT: entry:
3007 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3008 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3009 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3010 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3011 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3012 ; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3013 ; AMDGPU3-NEXT: ret void
3016 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3017 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3
3018 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3019 ; AMDGPU3-NEXT: entry:
3020 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3021 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3022 ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
3023 ; AMDGPU3-NEXT: ret void
3026 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3027 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
3028 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3029 ; AMDGPU3-NEXT: entry:
3030 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3031 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3032 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3033 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3034 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3035 ; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3036 ; AMDGPU3-NEXT: ret void
3039 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3040 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
3041 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3042 ; AMDGPU3-NEXT: entry:
3043 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3044 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3045 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
3046 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3047 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3048 ; AMDGPU3: user_code.entry:
3049 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3050 ; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3051 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3052 ; AMDGPU3-NEXT: ret void
3053 ; AMDGPU3: worker.exit:
3054 ; AMDGPU3-NEXT: ret void
3057 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3058 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4
3059 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3060 ; AMDGPU3-NEXT: entry:
3061 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3062 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3063 ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3064 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
3065 ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
3066 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3067 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
3068 ; AMDGPU3-NEXT: ret void
3071 ; AMDGPU3: Function Attrs: noinline nounwind
3072 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
3073 ; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] {
3074 ; AMDGPU3-NEXT: entry:
3075 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3076 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3077 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3078 ; AMDGPU3-NEXT: ret void
3081 ; AMDGPU3: Function Attrs: convergent noinline nounwind
3082 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
3083 ; AMDGPU3-SAME: () #[[ATTR1]] {
3084 ; AMDGPU3-NEXT: entry:
3085 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3086 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3087 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3088 ; AMDGPU3-NEXT: ret void
3091 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3092 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5
3093 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3094 ; AMDGPU3-NEXT: entry:
3095 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3096 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3097 ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
3098 ; AMDGPU3-NEXT: ret void
3101 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3102 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
3103 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3104 ; AMDGPU3-NEXT: entry:
3105 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3106 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3107 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3108 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3109 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3110 ; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3111 ; AMDGPU3-NEXT: ret void
3114 ; AMDGPU3: Function Attrs: noinline nounwind
3115 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
3116 ; AMDGPU3-SAME: () #[[ATTR6]] {
3117 ; AMDGPU3-NEXT: entry:
3118 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3119 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3120 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3121 ; AMDGPU3-NEXT: ret void
3124 ; AMDGPU3: Function Attrs: convergent noinline nounwind
3125 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
3126 ; AMDGPU3-SAME: () #[[ATTR1]] {
3127 ; AMDGPU3-NEXT: entry:
3128 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3129 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3130 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3131 ; AMDGPU3-NEXT: ret void
3134 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3135 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
3136 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3137 ; AMDGPU3-NEXT: entry:
3138 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3139 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3140 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
3141 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3142 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3143 ; AMDGPU3: user_code.entry:
3144 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3145 ; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3146 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3147 ; AMDGPU3-NEXT: ret void
3148 ; AMDGPU3: worker.exit:
3149 ; AMDGPU3-NEXT: ret void
3152 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3153 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6
3154 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3155 ; AMDGPU3-NEXT: entry:
3156 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3157 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3158 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3159 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3160 ; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
3161 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3162 ; AMDGPU3-NEXT: ret void
3165 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3166 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7
3167 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3168 ; AMDGPU3-NEXT: entry:
3169 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3170 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3171 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3172 ; AMDGPU3-NEXT: ret void
3175 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3176 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3177 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3178 ; AMDGPU3-NEXT: entry:
3179 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3180 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3181 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3182 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3183 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3184 ; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3185 ; AMDGPU3-NEXT: ret void
3188 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3189 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8
3190 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3191 ; AMDGPU3-NEXT: entry:
3192 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3193 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3194 ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
3195 ; AMDGPU3-NEXT: ret void
3198 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3199 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
3200 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3201 ; AMDGPU3-NEXT: entry:
3202 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3203 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3204 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3205 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3206 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3207 ; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3208 ; AMDGPU3-NEXT: ret void
3211 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3212 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
3213 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3214 ; AMDGPU3-NEXT: entry:
3215 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3216 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3217 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
3218 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3219 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3220 ; AMDGPU3: user_code.entry:
3221 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3222 ; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3223 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3224 ; AMDGPU3-NEXT: ret void
3225 ; AMDGPU3: worker.exit:
3226 ; AMDGPU3-NEXT: ret void
3229 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3230 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9
3231 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3232 ; AMDGPU3-NEXT: entry:
3233 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3234 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3235 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3236 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3237 ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3238 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3239 ; AMDGPU3-NEXT: ret void
3242 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3243 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10
3244 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3245 ; AMDGPU3-NEXT: entry:
3246 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3247 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3248 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3249 ; AMDGPU3-NEXT: ret void
3252 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3253 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
3254 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3255 ; AMDGPU3-NEXT: entry:
3256 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3257 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3258 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3259 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3260 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3261 ; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3262 ; AMDGPU3-NEXT: ret void
3265 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3266 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11
3267 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3268 ; AMDGPU3-NEXT: entry:
3269 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3270 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3271 ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
3272 ; AMDGPU3-NEXT: ret void
3275 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3276 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
3277 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3278 ; AMDGPU3-NEXT: entry:
3279 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3280 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3281 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3282 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3283 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3284 ; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3285 ; AMDGPU3-NEXT: ret void
3288 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3289 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
3290 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3291 ; AMDGPU3-NEXT: entry:
3292 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3293 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3294 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
3295 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3296 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3297 ; AMDGPU3: user_code.entry:
3298 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3299 ; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3300 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3301 ; AMDGPU3-NEXT: ret void
3302 ; AMDGPU3: worker.exit:
3303 ; AMDGPU3-NEXT: ret void
3306 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3307 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12
3308 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3309 ; AMDGPU3-NEXT: entry:
3310 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3311 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3312 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3313 ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3314 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3315 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3316 ; AMDGPU3-NEXT: ret void
3319 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3320 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13
3321 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3322 ; AMDGPU3-NEXT: entry:
3323 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3324 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3325 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3326 ; AMDGPU3-NEXT: ret void
3329 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3330 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
3331 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3332 ; AMDGPU3-NEXT: entry:
3333 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3334 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3335 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3336 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3337 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3338 ; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3339 ; AMDGPU3-NEXT: ret void
3342 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3343 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14
3344 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3345 ; AMDGPU3-NEXT: entry:
3346 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3347 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3348 ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
3349 ; AMDGPU3-NEXT: ret void
3352 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3353 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
3354 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3355 ; AMDGPU3-NEXT: entry:
3356 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3357 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3358 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3359 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3360 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3361 ; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3362 ; AMDGPU3-NEXT: ret void
3365 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3366 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
3367 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3368 ; AMDGPU3-NEXT: entry:
3369 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3370 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3371 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
3372 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3373 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3374 ; AMDGPU3: user_code.entry:
3375 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3376 ; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3377 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3378 ; AMDGPU3-NEXT: ret void
3379 ; AMDGPU3: worker.exit:
3380 ; AMDGPU3-NEXT: ret void
3383 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3384 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15
3385 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3386 ; AMDGPU3-NEXT: entry:
3387 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3388 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3389 ; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
3390 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
3391 ; AMDGPU3-NEXT: ret void
3394 ; AMDGPU3: Function Attrs: noinline nounwind
3395 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
3396 ; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
3397 ; AMDGPU3-NEXT: entry:
3398 ; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
3399 ; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
3400 ; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3401 ; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3402 ; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3404 ; AMDGPU3-NEXT: br label [[RETURN:%.*]]
3406 ; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3407 ; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3408 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
3409 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
3410 ; AMDGPU3-NEXT: br label [[RETURN]]
3412 ; AMDGPU3-NEXT: ret void
3415 ; AMDGPU3: Function Attrs: convergent noinline nounwind
3416 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
3417 ; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
3418 ; AMDGPU3-NEXT: entry:
3419 ; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
3420 ; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
3421 ; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3422 ; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3423 ; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3425 ; AMDGPU3-NEXT: br label [[RETURN:%.*]]
3427 ; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3428 ; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3429 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
3430 ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
3431 ; AMDGPU3-NEXT: br label [[RETURN]]
3433 ; AMDGPU3-NEXT: ret void
3436 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3437 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
3438 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3439 ; AMDGPU3-NEXT: entry:
3440 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3441 ; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3442 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
3443 ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3444 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3445 ; AMDGPU3: user_code.entry:
3446 ; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3447 ; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3448 ; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
3449 ; AMDGPU3-NEXT: ret void
3450 ; AMDGPU3: worker.exit:
3451 ; AMDGPU3-NEXT: ret void
3454 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3455 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16
3456 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3457 ; AMDGPU3-NEXT: entry:
3458 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3459 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3460 ; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
3461 ; AMDGPU3-NEXT: ret void
3464 ; AMDGPU3: Function Attrs: convergent noinline nounwind
3465 ; AMDGPU3-LABEL: define {{[^@]+}}@weak_callee_empty
3466 ; AMDGPU3-SAME: () #[[ATTR1]] {
3467 ; AMDGPU3-NEXT: entry:
3468 ; AMDGPU3-NEXT: ret void
3471 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3472 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17
3473 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3474 ; AMDGPU3-NEXT: entry:
3475 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3476 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3477 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3478 ; AMDGPU3-NEXT: ret void
3481 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3482 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
3483 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3484 ; AMDGPU3-NEXT: entry:
3485 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3486 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3487 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3488 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3489 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3490 ; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3491 ; AMDGPU3-NEXT: ret void
3494 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3495 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18
3496 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3497 ; AMDGPU3-NEXT: entry:
3498 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3499 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3500 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3501 ; AMDGPU3-NEXT: ret void
3504 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3505 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
3506 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3507 ; AMDGPU3-NEXT: entry:
3508 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3509 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3510 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3511 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3512 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3513 ; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3514 ; AMDGPU3-NEXT: ret void
3517 ; AMDGPU3: Function Attrs: noinline nounwind
3518 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
3519 ; AMDGPU3-SAME: () #[[ATTR6]] {
3520 ; AMDGPU3-NEXT: entry:
3521 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3522 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3523 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3524 ; AMDGPU3-NEXT: ret void
3527 ; AMDGPU3: Function Attrs: convergent noinline nounwind
3528 ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
3529 ; AMDGPU3-SAME: () #[[ATTR1]] {
3530 ; AMDGPU3-NEXT: entry:
3531 ; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3532 ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3533 ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3534 ; AMDGPU3-NEXT: ret void
3537 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3538 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19
3539 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3540 ; AMDGPU3-NEXT: entry:
3541 ; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3542 ; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3543 ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
3544 ; AMDGPU3-NEXT: ret void
3547 ; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
3548 ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
3549 ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3550 ; AMDGPU3-NEXT: entry:
3551 ; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3552 ; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3553 ; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3554 ; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3555 ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3556 ; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3557 ; AMDGPU3-NEXT: ret void
3560 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3561 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
3562 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
3563 ; NVPTX2-NEXT: entry:
3564 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3565 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3566 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
3567 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3568 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3569 ; NVPTX2: user_code.entry:
3570 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
3571 ; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3572 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3573 ; NVPTX2-NEXT: ret void
3574 ; NVPTX2: worker.exit:
3575 ; NVPTX2-NEXT: ret void
3578 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3579 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__
3580 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3581 ; NVPTX2-NEXT: entry:
3582 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3583 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3584 ; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
3585 ; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
3586 ; NVPTX2-NEXT: ret void
3589 ; NVPTX2: Function Attrs: convergent noinline nounwind
3590 ; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
3591 ; NVPTX2-SAME: () #[[ATTR1:[0-9]+]] {
3592 ; NVPTX2-NEXT: entry:
3593 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3594 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
3595 ; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
3596 ; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
3597 ; NVPTX2: omp_if.then:
3598 ; NVPTX2-NEXT: store i32 0, ptr @G, align 4
3599 ; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
3600 ; NVPTX2-NEXT: br label [[OMP_IF_END]]
3601 ; NVPTX2: omp_if.end:
3602 ; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
3603 ; NVPTX2-NEXT: ret void
3606 ; NVPTX2: Function Attrs: convergent noinline nounwind
3607 ; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
3608 ; NVPTX2-SAME: () #[[ATTR1]] {
3609 ; NVPTX2-NEXT: entry:
3610 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3611 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
3612 ; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
3613 ; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
3614 ; NVPTX2: omp_if.then:
3615 ; NVPTX2-NEXT: store i32 0, ptr @G, align 4
3616 ; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
3617 ; NVPTX2-NEXT: br label [[OMP_IF_END]]
3618 ; NVPTX2: omp_if.end:
3619 ; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
3620 ; NVPTX2-NEXT: ret void
3623 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3624 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
3625 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3626 ; NVPTX2-NEXT: entry:
3627 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3628 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3629 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
3630 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3631 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3632 ; NVPTX2: user_code.entry:
3633 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3634 ; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3635 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3636 ; NVPTX2-NEXT: ret void
3637 ; NVPTX2: worker.exit:
3638 ; NVPTX2-NEXT: ret void
3641 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3642 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1
3643 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3644 ; NVPTX2-NEXT: entry:
3645 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3646 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3647 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3648 ; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3649 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3650 ; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
3651 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3652 ; NVPTX2-NEXT: ret void
3655 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3656 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2
3657 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3658 ; NVPTX2-NEXT: entry:
3659 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3660 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3661 ; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
3662 ; NVPTX2-NEXT: ret void
3665 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3666 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
3667 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3668 ; NVPTX2-NEXT: entry:
3669 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3670 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3671 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3672 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3673 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3674 ; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3675 ; NVPTX2-NEXT: ret void
3678 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3679 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3
3680 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3681 ; NVPTX2-NEXT: entry:
3682 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3683 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3684 ; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
3685 ; NVPTX2-NEXT: ret void
3688 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3689 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
3690 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3691 ; NVPTX2-NEXT: entry:
3692 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3693 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3694 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3695 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3696 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3697 ; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3698 ; NVPTX2-NEXT: ret void
3701 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3702 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
3703 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3704 ; NVPTX2-NEXT: entry:
3705 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3706 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3707 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
3708 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3709 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3710 ; NVPTX2: user_code.entry:
3711 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3712 ; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3713 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3714 ; NVPTX2-NEXT: ret void
3715 ; NVPTX2: worker.exit:
3716 ; NVPTX2-NEXT: ret void
3719 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3720 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4
3721 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3722 ; NVPTX2-NEXT: entry:
3723 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3724 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3725 ; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3726 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
3727 ; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
3728 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3729 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
3730 ; NVPTX2-NEXT: ret void
3733 ; NVPTX2: Function Attrs: noinline nounwind
3734 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
3735 ; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] {
3736 ; NVPTX2-NEXT: entry:
3737 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3738 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3739 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3740 ; NVPTX2-NEXT: ret void
3743 ; NVPTX2: Function Attrs: convergent noinline nounwind
3744 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
3745 ; NVPTX2-SAME: () #[[ATTR1]] {
3746 ; NVPTX2-NEXT: entry:
3747 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3748 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3749 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3750 ; NVPTX2-NEXT: ret void
3753 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3754 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5
3755 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3756 ; NVPTX2-NEXT: entry:
3757 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3758 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3759 ; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
3760 ; NVPTX2-NEXT: ret void
3763 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3764 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
3765 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3766 ; NVPTX2-NEXT: entry:
3767 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3768 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3769 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3770 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3771 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3772 ; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3773 ; NVPTX2-NEXT: ret void
3776 ; NVPTX2: Function Attrs: noinline nounwind
3777 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
3778 ; NVPTX2-SAME: () #[[ATTR6]] {
3779 ; NVPTX2-NEXT: entry:
3780 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3781 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3782 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3783 ; NVPTX2-NEXT: ret void
3786 ; NVPTX2: Function Attrs: convergent noinline nounwind
3787 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
3788 ; NVPTX2-SAME: () #[[ATTR1]] {
3789 ; NVPTX2-NEXT: entry:
3790 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3791 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3792 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3793 ; NVPTX2-NEXT: ret void
3796 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3797 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
3798 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3799 ; NVPTX2-NEXT: entry:
3800 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3801 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3802 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
3803 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3804 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3805 ; NVPTX2: user_code.entry:
3806 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3807 ; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3808 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3809 ; NVPTX2-NEXT: ret void
3810 ; NVPTX2: worker.exit:
3811 ; NVPTX2-NEXT: ret void
3814 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3815 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6
3816 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3817 ; NVPTX2-NEXT: entry:
3818 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3819 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3820 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3821 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3822 ; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
3823 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3824 ; NVPTX2-NEXT: ret void
3827 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3828 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7
3829 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3830 ; NVPTX2-NEXT: entry:
3831 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3832 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3833 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
3834 ; NVPTX2-NEXT: ret void
3837 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3838 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3839 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3840 ; NVPTX2-NEXT: entry:
3841 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3842 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3843 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3844 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3845 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3846 ; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3847 ; NVPTX2-NEXT: ret void
3850 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3851 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8
3852 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3853 ; NVPTX2-NEXT: entry:
3854 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3855 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3856 ; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
3857 ; NVPTX2-NEXT: ret void
3860 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3861 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
3862 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3863 ; NVPTX2-NEXT: entry:
3864 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3865 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3866 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3867 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3868 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3869 ; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3870 ; NVPTX2-NEXT: ret void
3873 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3874 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
3875 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3876 ; NVPTX2-NEXT: entry:
3877 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3878 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3879 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
3880 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3881 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3882 ; NVPTX2: user_code.entry:
3883 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3884 ; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3885 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3886 ; NVPTX2-NEXT: ret void
3887 ; NVPTX2: worker.exit:
3888 ; NVPTX2-NEXT: ret void
3891 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3892 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9
3893 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3894 ; NVPTX2-NEXT: entry:
3895 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3896 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3897 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3898 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3899 ; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3900 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3901 ; NVPTX2-NEXT: ret void
3904 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3905 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10
3906 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3907 ; NVPTX2-NEXT: entry:
3908 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3909 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3910 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
3911 ; NVPTX2-NEXT: ret void
3914 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3915 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
3916 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3917 ; NVPTX2-NEXT: entry:
3918 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3919 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3920 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3921 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3922 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3923 ; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3924 ; NVPTX2-NEXT: ret void
3927 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3928 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11
3929 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3930 ; NVPTX2-NEXT: entry:
3931 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3932 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3933 ; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
3934 ; NVPTX2-NEXT: ret void
3937 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3938 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
3939 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3940 ; NVPTX2-NEXT: entry:
3941 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3942 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3943 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3944 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3945 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3946 ; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3947 ; NVPTX2-NEXT: ret void
3950 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3951 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
3952 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3953 ; NVPTX2-NEXT: entry:
3954 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3955 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3956 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
3957 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3958 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3959 ; NVPTX2: user_code.entry:
3960 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3961 ; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3962 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
3963 ; NVPTX2-NEXT: ret void
3964 ; NVPTX2: worker.exit:
3965 ; NVPTX2-NEXT: ret void
3968 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3969 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12
3970 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3971 ; NVPTX2-NEXT: entry:
3972 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3973 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3974 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3975 ; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
3976 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3977 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3978 ; NVPTX2-NEXT: ret void
3981 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3982 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13
3983 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3984 ; NVPTX2-NEXT: entry:
3985 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3986 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3987 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
3988 ; NVPTX2-NEXT: ret void
3991 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
3992 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
3993 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3994 ; NVPTX2-NEXT: entry:
3995 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
3996 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3997 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3998 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3999 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4000 ; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4001 ; NVPTX2-NEXT: ret void
4004 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4005 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14
4006 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4007 ; NVPTX2-NEXT: entry:
4008 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4009 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4010 ; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
4011 ; NVPTX2-NEXT: ret void
4014 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4015 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
4016 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4017 ; NVPTX2-NEXT: entry:
4018 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4019 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4020 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4021 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4022 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4023 ; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4024 ; NVPTX2-NEXT: ret void
4027 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4028 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
4029 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4030 ; NVPTX2-NEXT: entry:
4031 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4032 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4033 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
4034 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4035 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4036 ; NVPTX2: user_code.entry:
4037 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4038 ; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4039 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
4040 ; NVPTX2-NEXT: ret void
4041 ; NVPTX2: worker.exit:
4042 ; NVPTX2-NEXT: ret void
4045 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4046 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15
4047 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4048 ; NVPTX2-NEXT: entry:
4049 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4050 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4051 ; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
4052 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
4053 ; NVPTX2-NEXT: ret void
4056 ; NVPTX2: Function Attrs: noinline nounwind
4057 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
4058 ; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
4059 ; NVPTX2-NEXT: entry:
4060 ; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
4061 ; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
4062 ; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
4063 ; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
4064 ; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
4066 ; NVPTX2-NEXT: br label [[RETURN:%.*]]
4068 ; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
4069 ; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
4070 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
4071 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
4072 ; NVPTX2-NEXT: br label [[RETURN]]
4074 ; NVPTX2-NEXT: ret void
4077 ; NVPTX2: Function Attrs: convergent noinline nounwind
4078 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
4079 ; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
4080 ; NVPTX2-NEXT: entry:
4081 ; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
4082 ; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
4083 ; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
4084 ; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
4085 ; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
4087 ; NVPTX2-NEXT: br label [[RETURN:%.*]]
4089 ; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
4090 ; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
4091 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
4092 ; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
4093 ; NVPTX2-NEXT: br label [[RETURN]]
4095 ; NVPTX2-NEXT: ret void
4098 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4099 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
4100 ; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4101 ; NVPTX2-NEXT: entry:
4102 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4103 ; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4104 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
4105 ; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4106 ; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4107 ; NVPTX2: user_code.entry:
4108 ; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4109 ; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4110 ; NVPTX2-NEXT: call void @__kmpc_target_deinit()
4111 ; NVPTX2-NEXT: ret void
4112 ; NVPTX2: worker.exit:
4113 ; NVPTX2-NEXT: ret void
4116 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4117 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16
4118 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4119 ; NVPTX2-NEXT: entry:
4120 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4121 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4122 ; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
4123 ; NVPTX2-NEXT: ret void
4126 ; NVPTX2: Function Attrs: convergent noinline nounwind
4127 ; NVPTX2-LABEL: define {{[^@]+}}@weak_callee_empty
4128 ; NVPTX2-SAME: () #[[ATTR1]] {
4129 ; NVPTX2-NEXT: entry:
4130 ; NVPTX2-NEXT: ret void
4133 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4134 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17
4135 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4136 ; NVPTX2-NEXT: entry:
4137 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4138 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4139 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
4140 ; NVPTX2-NEXT: ret void
4143 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4144 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
4145 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4146 ; NVPTX2-NEXT: entry:
4147 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4148 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4149 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4150 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4151 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4152 ; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4153 ; NVPTX2-NEXT: ret void
4156 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4157 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18
4158 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4159 ; NVPTX2-NEXT: entry:
4160 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4161 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4162 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
4163 ; NVPTX2-NEXT: ret void
4166 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4167 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
4168 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4169 ; NVPTX2-NEXT: entry:
4170 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4171 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4172 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4173 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4174 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4175 ; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4176 ; NVPTX2-NEXT: ret void
4179 ; NVPTX2: Function Attrs: noinline nounwind
4180 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
4181 ; NVPTX2-SAME: () #[[ATTR6]] {
4182 ; NVPTX2-NEXT: entry:
4183 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4184 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4185 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4186 ; NVPTX2-NEXT: ret void
4189 ; NVPTX2: Function Attrs: convergent noinline nounwind
4190 ; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
4191 ; NVPTX2-SAME: () #[[ATTR1]] {
4192 ; NVPTX2-NEXT: entry:
4193 ; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4194 ; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4195 ; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4196 ; NVPTX2-NEXT: ret void
4199 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4200 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19
4201 ; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4202 ; NVPTX2-NEXT: entry:
4203 ; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4204 ; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4205 ; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
4206 ; NVPTX2-NEXT: ret void
4209 ; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
4210 ; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
4211 ; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4212 ; NVPTX2-NEXT: entry:
4213 ; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4214 ; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4215 ; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4216 ; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4217 ; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4218 ; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4219 ; NVPTX2-NEXT: ret void
4222 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4223 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
4224 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
4225 ; NVPTX3-NEXT: entry:
4226 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4227 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4228 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
4229 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4230 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4231 ; NVPTX3: user_code.entry:
4232 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
4233 ; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4234 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4235 ; NVPTX3-NEXT: ret void
4236 ; NVPTX3: worker.exit:
4237 ; NVPTX3-NEXT: ret void
4240 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4241 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__
4242 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4243 ; NVPTX3-NEXT: entry:
4244 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4245 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4246 ; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
4247 ; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
4248 ; NVPTX3-NEXT: ret void
4251 ; NVPTX3: Function Attrs: convergent noinline nounwind
4252 ; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
4253 ; NVPTX3-SAME: () #[[ATTR1:[0-9]+]] {
4254 ; NVPTX3-NEXT: entry:
4255 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4256 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
4257 ; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
4258 ; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
4259 ; NVPTX3: omp_if.then:
4260 ; NVPTX3-NEXT: store i32 0, ptr @G, align 4
4261 ; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
4262 ; NVPTX3-NEXT: br label [[OMP_IF_END]]
4263 ; NVPTX3: omp_if.end:
4264 ; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
4265 ; NVPTX3-NEXT: ret void
4268 ; NVPTX3: Function Attrs: convergent noinline nounwind
4269 ; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
4270 ; NVPTX3-SAME: () #[[ATTR1]] {
4271 ; NVPTX3-NEXT: entry:
4272 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4273 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
4274 ; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
4275 ; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
4276 ; NVPTX3: omp_if.then:
4277 ; NVPTX3-NEXT: store i32 0, ptr @G, align 4
4278 ; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
4279 ; NVPTX3-NEXT: br label [[OMP_IF_END]]
4280 ; NVPTX3: omp_if.end:
4281 ; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
4282 ; NVPTX3-NEXT: ret void
4285 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4286 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
4287 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4288 ; NVPTX3-NEXT: entry:
4289 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4290 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4291 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
4292 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4293 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4294 ; NVPTX3: user_code.entry:
4295 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4296 ; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4297 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4298 ; NVPTX3-NEXT: ret void
4299 ; NVPTX3: worker.exit:
4300 ; NVPTX3-NEXT: ret void
4303 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4304 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1
4305 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4306 ; NVPTX3-NEXT: entry:
4307 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4308 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4309 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
4310 ; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
4311 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4312 ; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
4313 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
4314 ; NVPTX3-NEXT: ret void
4317 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4318 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2
4319 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4320 ; NVPTX3-NEXT: entry:
4321 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4322 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4323 ; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
4324 ; NVPTX3-NEXT: ret void
4327 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4328 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
4329 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4330 ; NVPTX3-NEXT: entry:
4331 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4332 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4333 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4334 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4335 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4336 ; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4337 ; NVPTX3-NEXT: ret void
4340 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4341 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3
4342 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4343 ; NVPTX3-NEXT: entry:
4344 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4345 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4346 ; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
4347 ; NVPTX3-NEXT: ret void
4350 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4351 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
4352 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4353 ; NVPTX3-NEXT: entry:
4354 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4355 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4356 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4357 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4358 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4359 ; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4360 ; NVPTX3-NEXT: ret void
4363 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4364 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
4365 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4366 ; NVPTX3-NEXT: entry:
4367 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4368 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4369 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
4370 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4371 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4372 ; NVPTX3: user_code.entry:
4373 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4374 ; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4375 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4376 ; NVPTX3-NEXT: ret void
4377 ; NVPTX3: worker.exit:
4378 ; NVPTX3-NEXT: ret void
4381 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4382 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4
4383 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4384 ; NVPTX3-NEXT: entry:
4385 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4386 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4387 ; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
4388 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
4389 ; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
4390 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4391 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
4392 ; NVPTX3-NEXT: ret void
4395 ; NVPTX3: Function Attrs: noinline nounwind
4396 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
4397 ; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] {
4398 ; NVPTX3-NEXT: entry:
4399 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4400 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4401 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4402 ; NVPTX3-NEXT: ret void
4405 ; NVPTX3: Function Attrs: convergent noinline nounwind
4406 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
4407 ; NVPTX3-SAME: () #[[ATTR1]] {
4408 ; NVPTX3-NEXT: entry:
4409 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4410 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4411 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4412 ; NVPTX3-NEXT: ret void
4415 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4416 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5
4417 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4418 ; NVPTX3-NEXT: entry:
4419 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4420 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4421 ; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
4422 ; NVPTX3-NEXT: ret void
4425 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4426 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
4427 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4428 ; NVPTX3-NEXT: entry:
4429 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4430 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4431 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4432 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4433 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4434 ; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4435 ; NVPTX3-NEXT: ret void
4438 ; NVPTX3: Function Attrs: noinline nounwind
4439 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
4440 ; NVPTX3-SAME: () #[[ATTR6]] {
4441 ; NVPTX3-NEXT: entry:
4442 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4443 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4444 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4445 ; NVPTX3-NEXT: ret void
4448 ; NVPTX3: Function Attrs: convergent noinline nounwind
4449 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
4450 ; NVPTX3-SAME: () #[[ATTR1]] {
4451 ; NVPTX3-NEXT: entry:
4452 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4453 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4454 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4455 ; NVPTX3-NEXT: ret void
4458 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4459 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
4460 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4461 ; NVPTX3-NEXT: entry:
4462 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4463 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4464 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
4465 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4466 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4467 ; NVPTX3: user_code.entry:
4468 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4469 ; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4470 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4471 ; NVPTX3-NEXT: ret void
4472 ; NVPTX3: worker.exit:
4473 ; NVPTX3-NEXT: ret void
4476 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4477 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6
4478 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4479 ; NVPTX3-NEXT: entry:
4480 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4481 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4482 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
4483 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4484 ; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
4485 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
4486 ; NVPTX3-NEXT: ret void
4489 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4490 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7
4491 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4492 ; NVPTX3-NEXT: entry:
4493 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4494 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4495 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4496 ; NVPTX3-NEXT: ret void
4499 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4500 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
4501 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4502 ; NVPTX3-NEXT: entry:
4503 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4504 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4505 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4506 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4507 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4508 ; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4509 ; NVPTX3-NEXT: ret void
4512 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4513 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8
4514 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4515 ; NVPTX3-NEXT: entry:
4516 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4517 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4518 ; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
4519 ; NVPTX3-NEXT: ret void
4522 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4523 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
4524 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4525 ; NVPTX3-NEXT: entry:
4526 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4527 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4528 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4529 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4530 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4531 ; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4532 ; NVPTX3-NEXT: ret void
4535 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4536 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
4537 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4538 ; NVPTX3-NEXT: entry:
4539 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4540 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4541 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
4542 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4543 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4544 ; NVPTX3: user_code.entry:
4545 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4546 ; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4547 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4548 ; NVPTX3-NEXT: ret void
4549 ; NVPTX3: worker.exit:
4550 ; NVPTX3-NEXT: ret void
4553 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4554 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9
4555 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4556 ; NVPTX3-NEXT: entry:
4557 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4558 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4559 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
4560 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4561 ; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
4562 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
4563 ; NVPTX3-NEXT: ret void
4566 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4567 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10
4568 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4569 ; NVPTX3-NEXT: entry:
4570 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4571 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4572 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4573 ; NVPTX3-NEXT: ret void
4576 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4577 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
4578 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4579 ; NVPTX3-NEXT: entry:
4580 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4581 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4582 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4583 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4584 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4585 ; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4586 ; NVPTX3-NEXT: ret void
4589 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4590 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11
4591 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4592 ; NVPTX3-NEXT: entry:
4593 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4594 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4595 ; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
4596 ; NVPTX3-NEXT: ret void
4599 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4600 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
4601 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4602 ; NVPTX3-NEXT: entry:
4603 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4604 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4605 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4606 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4607 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4608 ; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4609 ; NVPTX3-NEXT: ret void
4612 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4613 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
4614 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4615 ; NVPTX3-NEXT: entry:
4616 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4617 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4618 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
4619 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4620 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4621 ; NVPTX3: user_code.entry:
4622 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4623 ; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4624 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4625 ; NVPTX3-NEXT: ret void
4626 ; NVPTX3: worker.exit:
4627 ; NVPTX3-NEXT: ret void
4630 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4631 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12
4632 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4633 ; NVPTX3-NEXT: entry:
4634 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4635 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4636 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
4637 ; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
4638 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4639 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
4640 ; NVPTX3-NEXT: ret void
4643 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4644 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13
4645 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4646 ; NVPTX3-NEXT: entry:
4647 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4648 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4649 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4650 ; NVPTX3-NEXT: ret void
4653 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4654 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
4655 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4656 ; NVPTX3-NEXT: entry:
4657 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4658 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4659 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4660 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4661 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4662 ; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4663 ; NVPTX3-NEXT: ret void
4666 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4667 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14
4668 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4669 ; NVPTX3-NEXT: entry:
4670 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4671 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4672 ; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
4673 ; NVPTX3-NEXT: ret void
4676 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4677 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
4678 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4679 ; NVPTX3-NEXT: entry:
4680 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4681 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4682 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4683 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4684 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4685 ; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4686 ; NVPTX3-NEXT: ret void
4689 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4690 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
4691 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4692 ; NVPTX3-NEXT: entry:
4693 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4694 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4695 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
4696 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4697 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4698 ; NVPTX3: user_code.entry:
4699 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4700 ; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4701 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4702 ; NVPTX3-NEXT: ret void
4703 ; NVPTX3: worker.exit:
4704 ; NVPTX3-NEXT: ret void
4707 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4708 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15
4709 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4710 ; NVPTX3-NEXT: entry:
4711 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4712 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4713 ; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
4714 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
4715 ; NVPTX3-NEXT: ret void
4718 ; NVPTX3: Function Attrs: noinline nounwind
4719 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
4720 ; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
4721 ; NVPTX3-NEXT: entry:
4722 ; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
4723 ; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
4724 ; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
4725 ; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
4726 ; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
4728 ; NVPTX3-NEXT: br label [[RETURN:%.*]]
4730 ; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
4731 ; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
4732 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
4733 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
4734 ; NVPTX3-NEXT: br label [[RETURN]]
4736 ; NVPTX3-NEXT: ret void
4739 ; NVPTX3: Function Attrs: convergent noinline nounwind
4740 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
4741 ; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
4742 ; NVPTX3-NEXT: entry:
4743 ; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
4744 ; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
4745 ; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
4746 ; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
4747 ; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
4749 ; NVPTX3-NEXT: br label [[RETURN:%.*]]
4751 ; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
4752 ; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
4753 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
4754 ; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
4755 ; NVPTX3-NEXT: br label [[RETURN]]
4757 ; NVPTX3-NEXT: ret void
4760 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4761 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
4762 ; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
4763 ; NVPTX3-NEXT: entry:
4764 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4765 ; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
4766 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
4767 ; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4768 ; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4769 ; NVPTX3: user_code.entry:
4770 ; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
4771 ; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4772 ; NVPTX3-NEXT: call void @__kmpc_target_deinit()
4773 ; NVPTX3-NEXT: ret void
4774 ; NVPTX3: worker.exit:
4775 ; NVPTX3-NEXT: ret void
4778 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4779 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16
4780 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4781 ; NVPTX3-NEXT: entry:
4782 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4783 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4784 ; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
4785 ; NVPTX3-NEXT: ret void
4788 ; NVPTX3: Function Attrs: convergent noinline nounwind
4789 ; NVPTX3-LABEL: define {{[^@]+}}@weak_callee_empty
4790 ; NVPTX3-SAME: () #[[ATTR1]] {
4791 ; NVPTX3-NEXT: entry:
4792 ; NVPTX3-NEXT: ret void
4795 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4796 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17
4797 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4798 ; NVPTX3-NEXT: entry:
4799 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4800 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4801 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4802 ; NVPTX3-NEXT: ret void
4805 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4806 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
4807 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4808 ; NVPTX3-NEXT: entry:
4809 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4810 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4811 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4812 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4813 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4814 ; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4815 ; NVPTX3-NEXT: ret void
4818 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4819 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18
4820 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4821 ; NVPTX3-NEXT: entry:
4822 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4823 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4824 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4825 ; NVPTX3-NEXT: ret void
4828 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4829 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
4830 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4831 ; NVPTX3-NEXT: entry:
4832 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4833 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4834 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4835 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4836 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4837 ; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4838 ; NVPTX3-NEXT: ret void
4841 ; NVPTX3: Function Attrs: noinline nounwind
4842 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
4843 ; NVPTX3-SAME: () #[[ATTR6]] {
4844 ; NVPTX3-NEXT: entry:
4845 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4846 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4847 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4848 ; NVPTX3-NEXT: ret void
4851 ; NVPTX3: Function Attrs: convergent noinline nounwind
4852 ; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
4853 ; NVPTX3-SAME: () #[[ATTR1]] {
4854 ; NVPTX3-NEXT: entry:
4855 ; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4856 ; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4857 ; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4858 ; NVPTX3-NEXT: ret void
4861 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4862 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19
4863 ; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4864 ; NVPTX3-NEXT: entry:
4865 ; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4866 ; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4867 ; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
4868 ; NVPTX3-NEXT: ret void
4871 ; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
4872 ; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
4873 ; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4874 ; NVPTX3-NEXT: entry:
4875 ; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
4876 ; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4877 ; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4878 ; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4879 ; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4880 ; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4881 ; NVPTX3-NEXT: ret void
4884 ; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4885 ; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4886 ; AMDGPU1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4887 ; AMDGPU1: attributes #[[ATTR3]] = { nounwind }
4888 ; AMDGPU1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4889 ; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4890 ; AMDGPU1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4891 ; AMDGPU1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4892 ; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4893 ; AMDGPU1: attributes #[[ATTR9]] = { convergent nounwind }
4894 ; AMDGPU1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4895 ; AMDGPU1: attributes #[[ATTR11]] = { convergent }
4897 ; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4898 ; NVPTX1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4899 ; NVPTX1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4900 ; NVPTX1: attributes #[[ATTR3]] = { nounwind }
4901 ; NVPTX1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4902 ; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4903 ; NVPTX1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4904 ; NVPTX1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4905 ; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4906 ; NVPTX1: attributes #[[ATTR9]] = { convergent nounwind }
4907 ; NVPTX1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4908 ; NVPTX1: attributes #[[ATTR11]] = { convergent }
4910 ; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4911 ; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4912 ; AMDGPU2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4913 ; AMDGPU2: attributes #[[ATTR3]] = { nounwind }
4914 ; AMDGPU2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4915 ; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4916 ; AMDGPU2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4917 ; AMDGPU2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4918 ; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4919 ; AMDGPU2: attributes #[[ATTR9]] = { convergent nounwind }
4920 ; AMDGPU2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4921 ; AMDGPU2: attributes #[[ATTR11]] = { convergent }
4923 ; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4924 ; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4925 ; AMDGPU3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4926 ; AMDGPU3: attributes #[[ATTR3]] = { nounwind }
4927 ; AMDGPU3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4928 ; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4929 ; AMDGPU3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4930 ; AMDGPU3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4931 ; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4932 ; AMDGPU3: attributes #[[ATTR9]] = { convergent nounwind }
4933 ; AMDGPU3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4934 ; AMDGPU3: attributes #[[ATTR11]] = { convergent }
4936 ; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4937 ; NVPTX2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4938 ; NVPTX2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4939 ; NVPTX2: attributes #[[ATTR3]] = { nounwind }
4940 ; NVPTX2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4941 ; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4942 ; NVPTX2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4943 ; NVPTX2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4944 ; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4945 ; NVPTX2: attributes #[[ATTR9]] = { convergent nounwind }
4946 ; NVPTX2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4947 ; NVPTX2: attributes #[[ATTR11]] = { convergent }
4949 ; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4950 ; NVPTX3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4951 ; NVPTX3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4952 ; NVPTX3: attributes #[[ATTR3]] = { nounwind }
4953 ; NVPTX3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4954 ; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4955 ; NVPTX3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4956 ; NVPTX3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4957 ; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4958 ; NVPTX3: attributes #[[ATTR9]] = { convergent nounwind }
4959 ; NVPTX3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4960 ; NVPTX3: attributes #[[ATTR11]] = { convergent }
4962 ; AMDGPU1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4963 ; AMDGPU1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4964 ; AMDGPU1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4965 ; AMDGPU1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4966 ; AMDGPU1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4967 ; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4968 ; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4969 ; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4970 ; AMDGPU1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4971 ; AMDGPU1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4972 ; AMDGPU1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4974 ; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4975 ; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4976 ; NVPTX1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4977 ; NVPTX1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4978 ; NVPTX1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4979 ; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4980 ; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4981 ; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4982 ; NVPTX1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4983 ; NVPTX1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4984 ; NVPTX1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4986 ; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4987 ; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4988 ; AMDGPU2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4989 ; AMDGPU2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4990 ; AMDGPU2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4991 ; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4992 ; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4993 ; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4994 ; AMDGPU2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4995 ; AMDGPU2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4996 ; AMDGPU2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4998 ; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4999 ; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
5000 ; AMDGPU3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
5001 ; AMDGPU3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
5002 ; AMDGPU3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
5003 ; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
5004 ; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
5005 ; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
5006 ; AMDGPU3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
5007 ; AMDGPU3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
5008 ; AMDGPU3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
5010 ; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
5011 ; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
5012 ; NVPTX2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
5013 ; NVPTX2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
5014 ; NVPTX2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
5015 ; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
5016 ; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
5017 ; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
5018 ; NVPTX2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
5019 ; NVPTX2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
5020 ; NVPTX2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
5022 ; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
5023 ; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
5024 ; NVPTX3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
5025 ; NVPTX3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
5026 ; NVPTX3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
5027 ; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
5028 ; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
5029 ; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
5030 ; NVPTX3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
5031 ; NVPTX3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
5032 ; NVPTX3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}