1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
3 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
4 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
5 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2
6 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1
7 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2
10 ;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")));
12 ;; void sequential_loop() {
13 ;; #pragma omp target teams
15 ;; for (int i = 0; i < 100; ++i) {
16 ;; #pragma omp parallel
25 ;; void use(__attribute__((noescape)) int *) __attribute__((assume("ompx_spmd_amenable")));
27 ;; void sequential_loop_to_stack_var() {
28 ;; #pragma omp target teams
32 ;; for (int i = 0; i < 100; ++i) {
33 ;; #pragma omp parallel
42 ;; void sequential_loop_to_shared_var() {
43 ;; #pragma omp target teams
46 ;; for (int i = 0; i < 100; ++i) {
47 ;; #pragma omp parallel
57 ;; void sequential_loop_to_shared_var_guarded() {
58 ;; #pragma omp target teams
61 ;; for (int i = 0; i < 100; ++i) {
62 ;; #pragma omp parallel
72 ;; void do_not_spmdize_target() {
73 ;; #pragma omp target teams
75 ;; // Incompatible parallel level, called both
76 ;; // from parallel and target regions
81 ;; void do_not_spmdize_task() {
86 ;; #pragma omp parallel
91 %struct.ident_t = type { i32, i32, i32, i32, ptr }
92 %struct.kmp_task_t_with_privates = type { %struct.kmp_task_t }
93 %struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
94 %union.kmp_cmplrdata_t = type { ptr }
95 %struct.anon = type {}
96 %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32 }
97 %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
99 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
100 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
101 @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
102 @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
103 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
104 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
105 @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
106 @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
108 ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
109 ; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
110 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
111 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
112 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
113 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
114 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
115 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
116 ; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
117 ; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
118 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
119 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
120 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
121 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
122 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
123 ; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
124 ; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
125 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
126 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
127 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
128 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
129 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
130 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
131 ; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
132 ; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
133 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
134 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
135 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
136 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
137 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
139 ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
140 ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
141 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
142 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
143 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
144 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
145 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
146 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
147 ; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
148 ; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
149 ; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
150 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
152 ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
153 ; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
154 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
155 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
156 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
157 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
158 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
159 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
160 ; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
161 ; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
162 ; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
163 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
165 ; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
166 ; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
167 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
168 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
169 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
170 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
171 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
172 ; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
173 ; AMDGPU-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
174 ; AMDGPU-DISABLED1: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
175 ; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
176 ; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
177 ; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
178 ; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
179 ; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
181 ; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
182 ; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
183 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
184 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
185 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
186 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
187 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
188 ; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
189 ; AMDGPU-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
190 ; AMDGPU-DISABLED2: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
191 ; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
192 ; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
193 ; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
194 ; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
195 ; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
197 ; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
198 ; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
199 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
200 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
201 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
202 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
203 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
204 ; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
205 ; NVPTX-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
206 ; NVPTX-DISABLED1: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
207 ; NVPTX-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
208 ; NVPTX-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
209 ; NVPTX-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
210 ; NVPTX-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
211 ; NVPTX-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
213 ; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
214 ; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
215 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
216 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
217 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
218 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
219 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
220 ; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
221 ; NVPTX-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
222 ; NVPTX-DISABLED2: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
223 ; NVPTX-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
224 ; NVPTX-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
225 ; NVPTX-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
226 ; NVPTX-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
227 ; NVPTX-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
229 define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
230 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
231 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
232 ; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
233 ; AMDGPU-NEXT: ret void
235 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
236 ; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
237 ; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
238 ; NVPTX-NEXT: ret void
240 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
241 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
242 ; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
243 ; AMDGPU-DISABLED1-NEXT: ret void
245 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
246 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
247 ; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
248 ; AMDGPU-DISABLED2-NEXT: ret void
250 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
251 ; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
252 ; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
253 ; NVPTX-DISABLED1-NEXT: ret void
255 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
256 ; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
257 ; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
258 ; NVPTX-DISABLED2-NEXT: ret void
260 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
261 ; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
262 ; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
263 ; AMDGPU-DISABLED-NEXT: ret void
264 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
265 ; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
266 ; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
267 ; NVPTX-DISABLED-NEXT: ret void
268 call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
272 define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() {
273 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
274 ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
275 ; AMDGPU-NEXT: entry:
276 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
277 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
278 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
279 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
280 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
281 ; AMDGPU: common.ret:
282 ; AMDGPU-NEXT: ret void
283 ; AMDGPU: user_code.entry:
284 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
285 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
286 ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
287 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
288 ; AMDGPU-NEXT: br label [[COMMON_RET]]
290 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
291 ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
293 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
294 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
295 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
296 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
297 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
299 ; NVPTX-NEXT: ret void
300 ; NVPTX: user_code.entry:
301 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
302 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
303 ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
304 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
305 ; NVPTX-NEXT: br label [[COMMON_RET]]
307 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
308 ; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
309 ; AMDGPU-DISABLED1-NEXT: entry:
310 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
311 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
312 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
313 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
314 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
315 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
316 ; AMDGPU-DISABLED1: is_worker_check:
317 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
318 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
319 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
320 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
321 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
322 ; AMDGPU-DISABLED1: worker_state_machine.begin:
323 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
324 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
325 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
326 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
327 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
328 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
329 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
330 ; AMDGPU-DISABLED1: worker_state_machine.finished:
331 ; AMDGPU-DISABLED1-NEXT: ret void
332 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
333 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
334 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
335 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
336 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
337 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
338 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
339 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
340 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
341 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
342 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
343 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
344 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
345 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
346 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
347 ; AMDGPU-DISABLED1: thread.user_code.check:
348 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
349 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
350 ; AMDGPU-DISABLED1: common.ret:
351 ; AMDGPU-DISABLED1-NEXT: ret void
352 ; AMDGPU-DISABLED1: user_code.entry:
353 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
354 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
355 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
356 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
357 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
359 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
360 ; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
361 ; AMDGPU-DISABLED2-NEXT: entry:
362 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
363 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
364 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
365 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
366 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
367 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
368 ; AMDGPU-DISABLED2: is_worker_check:
369 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
370 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
371 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
372 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
373 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
374 ; AMDGPU-DISABLED2: worker_state_machine.begin:
375 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
376 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
377 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
378 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
379 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
380 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
381 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
382 ; AMDGPU-DISABLED2: worker_state_machine.finished:
383 ; AMDGPU-DISABLED2-NEXT: ret void
384 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
385 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
386 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
387 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
388 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
389 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
390 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
391 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
392 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
393 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
394 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
395 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
396 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
397 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
398 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
399 ; AMDGPU-DISABLED2: thread.user_code.check:
400 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
401 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
402 ; AMDGPU-DISABLED2: common.ret:
403 ; AMDGPU-DISABLED2-NEXT: ret void
404 ; AMDGPU-DISABLED2: user_code.entry:
405 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
406 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
407 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
408 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
409 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
411 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
412 ; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
413 ; NVPTX-DISABLED1-NEXT: entry:
414 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
415 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
416 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
417 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
418 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
419 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
420 ; NVPTX-DISABLED1: is_worker_check:
421 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
422 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
423 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
424 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
425 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
426 ; NVPTX-DISABLED1: worker_state_machine.begin:
427 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
428 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
429 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
430 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
431 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
432 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
433 ; NVPTX-DISABLED1: worker_state_machine.finished:
434 ; NVPTX-DISABLED1-NEXT: ret void
435 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
436 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
437 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
438 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
439 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
440 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
441 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
442 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
443 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
444 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
445 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
446 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
447 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
448 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
449 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
450 ; NVPTX-DISABLED1: thread.user_code.check:
451 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
452 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
453 ; NVPTX-DISABLED1: common.ret:
454 ; NVPTX-DISABLED1-NEXT: ret void
455 ; NVPTX-DISABLED1: user_code.entry:
456 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
457 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
458 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
459 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
460 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
462 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
463 ; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
464 ; NVPTX-DISABLED2-NEXT: entry:
465 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
466 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
467 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
468 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
469 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
470 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
471 ; NVPTX-DISABLED2: is_worker_check:
472 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
473 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
474 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
475 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
476 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
477 ; NVPTX-DISABLED2: worker_state_machine.begin:
478 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
479 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
480 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
481 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
482 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
483 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
484 ; NVPTX-DISABLED2: worker_state_machine.finished:
485 ; NVPTX-DISABLED2-NEXT: ret void
486 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
487 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
488 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
489 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
490 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
491 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
492 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
493 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
494 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
495 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
496 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
497 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
498 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
499 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
500 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
501 ; NVPTX-DISABLED2: thread.user_code.check:
502 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
503 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
504 ; NVPTX-DISABLED2: common.ret:
505 ; NVPTX-DISABLED2-NEXT: ret void
506 ; NVPTX-DISABLED2: user_code.entry:
507 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
508 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
509 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
510 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
511 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
513 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
514 ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
515 ; AMDGPU-DISABLED-NEXT: entry:
516 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
517 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
518 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
519 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
520 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
521 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
522 ; AMDGPU-DISABLED: is_worker_check:
523 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
524 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
525 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
526 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
527 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
528 ; AMDGPU-DISABLED: worker_state_machine.begin:
529 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
530 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
531 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
532 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
533 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
534 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
535 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
536 ; AMDGPU-DISABLED: worker_state_machine.finished:
537 ; AMDGPU-DISABLED-NEXT: ret void
538 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
539 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
540 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
541 ; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
542 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
543 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
544 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
545 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
546 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
547 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
548 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
549 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
550 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
551 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
552 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
553 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
554 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
555 ; AMDGPU-DISABLED: thread.user_code.check:
556 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
557 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
558 ; AMDGPU-DISABLED: common.ret:
559 ; AMDGPU-DISABLED-NEXT: ret void
560 ; AMDGPU-DISABLED: user_code.entry:
561 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
562 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
563 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
564 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
565 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
566 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
567 ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
568 ; NVPTX-DISABLED-NEXT: entry:
569 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
570 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
571 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
572 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
573 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
574 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
575 ; NVPTX-DISABLED: is_worker_check:
576 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
577 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
578 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
579 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
580 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
581 ; NVPTX-DISABLED: worker_state_machine.begin:
582 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
583 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
584 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
585 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
586 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
587 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
588 ; NVPTX-DISABLED: worker_state_machine.finished:
589 ; NVPTX-DISABLED-NEXT: ret void
590 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
591 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
592 ; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
593 ; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
594 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
595 ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
596 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
597 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
598 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
599 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
600 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
601 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
602 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
603 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
604 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
605 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
606 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
607 ; NVPTX-DISABLED: thread.user_code.check:
608 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
609 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
610 ; NVPTX-DISABLED: common.ret:
611 ; NVPTX-DISABLED-NEXT: ret void
612 ; NVPTX-DISABLED: user_code.entry:
613 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
614 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
615 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
616 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
617 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
619 %.zero.addr = alloca i32, align 4
620 %.threadid_temp. = alloca i32, align 4
621 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
622 %exec_user_code = icmp eq i32 %0, -1
623 br i1 %exec_user_code, label %user_code.entry, label %common.ret
625 common.ret: ; preds = %entry, %user_code.entry
628 user_code.entry: ; preds = %entry
629 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
630 store i32 0, ptr %.zero.addr, align 4
631 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
632 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6
633 call void @__kmpc_target_deinit()
637 ; Function Attrs: alwaysinline convergent norecurse nounwind
638 define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
639 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
640 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
641 ; AMDGPU-NEXT: entry:
642 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
643 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
645 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
646 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
647 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
648 ; AMDGPU: for.cond.cleanup:
649 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
650 ; AMDGPU-NEXT: ret void
652 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
653 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
654 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
655 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
657 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
658 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
660 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
661 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
663 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
664 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
665 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
666 ; NVPTX: for.cond.cleanup:
667 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
668 ; NVPTX-NEXT: ret void
670 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
671 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
672 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
673 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
675 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
676 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
677 ; AMDGPU-DISABLED1-NEXT: entry:
678 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
679 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
680 ; AMDGPU-DISABLED1: for.cond:
681 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
682 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
683 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
684 ; AMDGPU-DISABLED1: for.cond.cleanup:
685 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
686 ; AMDGPU-DISABLED1-NEXT: ret void
687 ; AMDGPU-DISABLED1: for.body:
688 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
689 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
690 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
691 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
693 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
694 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
695 ; AMDGPU-DISABLED2-NEXT: entry:
696 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
697 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
698 ; AMDGPU-DISABLED2: for.cond:
699 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
700 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
701 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
702 ; AMDGPU-DISABLED2: for.cond.cleanup:
703 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
704 ; AMDGPU-DISABLED2-NEXT: ret void
705 ; AMDGPU-DISABLED2: for.body:
706 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
707 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
708 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
709 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
711 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
712 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
713 ; NVPTX-DISABLED1-NEXT: entry:
714 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
715 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
716 ; NVPTX-DISABLED1: for.cond:
717 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
718 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
719 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
720 ; NVPTX-DISABLED1: for.cond.cleanup:
721 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
722 ; NVPTX-DISABLED1-NEXT: ret void
723 ; NVPTX-DISABLED1: for.body:
724 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
725 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
726 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
727 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
729 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
730 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
731 ; NVPTX-DISABLED2-NEXT: entry:
732 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
733 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
734 ; NVPTX-DISABLED2: for.cond:
735 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
736 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
737 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
738 ; NVPTX-DISABLED2: for.cond.cleanup:
739 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
740 ; NVPTX-DISABLED2-NEXT: ret void
741 ; NVPTX-DISABLED2: for.body:
742 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
743 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
744 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
745 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
747 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
748 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
749 ; AMDGPU-DISABLED-NEXT: entry:
750 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
751 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
752 ; AMDGPU-DISABLED: for.cond:
753 ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
754 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
755 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
756 ; AMDGPU-DISABLED: for.cond.cleanup:
757 ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
758 ; AMDGPU-DISABLED-NEXT: ret void
759 ; AMDGPU-DISABLED: for.body:
760 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
761 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
762 ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
763 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
764 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
765 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
766 ; NVPTX-DISABLED-NEXT: entry:
767 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
768 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
769 ; NVPTX-DISABLED: for.cond:
770 ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
771 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
772 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
773 ; NVPTX-DISABLED: for.cond.cleanup:
774 ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
775 ; NVPTX-DISABLED-NEXT: ret void
776 ; NVPTX-DISABLED: for.body:
777 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
778 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
779 ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
780 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
782 %captured_vars_addrs = alloca [0 x ptr], align 8
785 for.cond: ; preds = %for.body, %entry
786 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
787 %cmp = icmp slt i32 %i.0, 100
788 br i1 %cmp, label %for.body, label %for.cond.cleanup
790 for.cond.cleanup: ; preds = %for.cond
791 call void @spmd_amenable() #10
794 for.body: ; preds = %for.cond
795 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
796 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
797 %inc = add nsw i32 %i.0, 1
798 br label %for.cond, !llvm.loop !22
801 ; Function Attrs: alwaysinline convergent norecurse nounwind
802 define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
803 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
804 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
805 ; AMDGPU-NEXT: entry:
806 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
807 ; AMDGPU-NEXT: ret void
809 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
810 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
812 ; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
813 ; NVPTX-NEXT: ret void
815 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
816 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
817 ; AMDGPU-DISABLED1-NEXT: entry:
818 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
819 ; AMDGPU-DISABLED1-NEXT: ret void
821 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
822 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
823 ; AMDGPU-DISABLED2-NEXT: entry:
824 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
825 ; AMDGPU-DISABLED2-NEXT: ret void
827 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
828 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
829 ; NVPTX-DISABLED1-NEXT: entry:
830 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
831 ; NVPTX-DISABLED1-NEXT: ret void
833 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
834 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
835 ; NVPTX-DISABLED2-NEXT: entry:
836 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
837 ; NVPTX-DISABLED2-NEXT: ret void
839 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
840 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
841 ; AMDGPU-DISABLED-NEXT: entry:
842 ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
843 ; AMDGPU-DISABLED-NEXT: ret void
844 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
845 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
846 ; NVPTX-DISABLED-NEXT: entry:
847 ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
848 ; NVPTX-DISABLED-NEXT: ret void
850 call void @unknown() #11
854 ; Function Attrs: convergent norecurse nounwind
855 define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
856 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
857 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
858 ; AMDGPU-NEXT: entry:
859 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
860 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
861 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
862 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
863 ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
864 ; AMDGPU-NEXT: ret void
866 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
867 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
869 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
870 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
871 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
872 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
873 ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
874 ; NVPTX-NEXT: ret void
876 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
877 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
878 ; AMDGPU-DISABLED1-NEXT: entry:
879 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
880 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
881 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
882 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
883 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
884 ; AMDGPU-DISABLED1-NEXT: ret void
886 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
887 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
888 ; AMDGPU-DISABLED2-NEXT: entry:
889 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
890 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
891 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
892 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
893 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
894 ; AMDGPU-DISABLED2-NEXT: ret void
896 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
897 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
898 ; NVPTX-DISABLED1-NEXT: entry:
899 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
900 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
901 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
902 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
903 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
904 ; NVPTX-DISABLED1-NEXT: ret void
906 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
907 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
908 ; NVPTX-DISABLED2-NEXT: entry:
909 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
910 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
911 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
912 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
913 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
914 ; NVPTX-DISABLED2-NEXT: ret void
916 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
917 ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
918 ; AMDGPU-DISABLED-NEXT: entry:
919 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
920 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
921 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
922 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
923 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
924 ; AMDGPU-DISABLED-NEXT: ret void
925 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
926 ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
927 ; NVPTX-DISABLED-NEXT: entry:
928 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
929 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
930 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
931 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
932 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
933 ; NVPTX-DISABLED-NEXT: ret void
935 %.addr1 = alloca i32, align 4
936 %.zero.addr = alloca i32, align 4
937 %global_args = alloca ptr, align 8
938 store i32 %1, ptr %.addr1, align 4, !tbaa !18
939 store i32 0, ptr %.zero.addr, align 4
940 call void @__kmpc_get_shared_variables(ptr %global_args)
941 call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6
945 ; Function Attrs: alwaysinline convergent norecurse nounwind
946 define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
947 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
948 ; AMDGPU-SAME: () #[[ATTR0]] {
949 ; AMDGPU-NEXT: entry:
950 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
951 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
952 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
953 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
954 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
955 ; AMDGPU: common.ret:
956 ; AMDGPU-NEXT: ret void
957 ; AMDGPU: user_code.entry:
958 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
959 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
960 ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
961 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
962 ; AMDGPU-NEXT: br label [[COMMON_RET]]
964 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
965 ; NVPTX-SAME: () #[[ATTR0]] {
967 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
968 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
969 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
970 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
971 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
973 ; NVPTX-NEXT: ret void
974 ; NVPTX: user_code.entry:
975 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
976 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
977 ; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
978 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
979 ; NVPTX-NEXT: br label [[COMMON_RET]]
981 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
982 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
983 ; AMDGPU-DISABLED1-NEXT: entry:
984 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
985 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
986 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
987 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
988 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
989 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
990 ; AMDGPU-DISABLED1: is_worker_check:
991 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
992 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
993 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
994 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
995 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
996 ; AMDGPU-DISABLED1: worker_state_machine.begin:
997 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
998 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
999 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1000 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1001 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1002 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1003 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1004 ; AMDGPU-DISABLED1: worker_state_machine.finished:
1005 ; AMDGPU-DISABLED1-NEXT: ret void
1006 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
1007 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1008 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
1009 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1010 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
1011 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1012 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1013 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
1014 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1015 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
1016 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1017 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1018 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
1019 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1020 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1021 ; AMDGPU-DISABLED1: thread.user_code.check:
1022 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1023 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1024 ; AMDGPU-DISABLED1: common.ret:
1025 ; AMDGPU-DISABLED1-NEXT: ret void
1026 ; AMDGPU-DISABLED1: user_code.entry:
1027 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1028 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1029 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1030 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1031 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
1033 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
1034 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1035 ; AMDGPU-DISABLED2-NEXT: entry:
1036 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1037 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1038 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1039 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
1040 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1041 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1042 ; AMDGPU-DISABLED2: is_worker_check:
1043 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1044 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1045 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1046 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1047 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1048 ; AMDGPU-DISABLED2: worker_state_machine.begin:
1049 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1050 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1051 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1052 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1053 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1054 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1055 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1056 ; AMDGPU-DISABLED2: worker_state_machine.finished:
1057 ; AMDGPU-DISABLED2-NEXT: ret void
1058 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
1059 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1060 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
1061 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1062 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
1063 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1064 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1065 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
1066 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1067 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
1068 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1069 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1070 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
1071 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1072 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1073 ; AMDGPU-DISABLED2: thread.user_code.check:
1074 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1075 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1076 ; AMDGPU-DISABLED2: common.ret:
1077 ; AMDGPU-DISABLED2-NEXT: ret void
1078 ; AMDGPU-DISABLED2: user_code.entry:
1079 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1080 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1081 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1082 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1083 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
1085 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
1086 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1087 ; NVPTX-DISABLED1-NEXT: entry:
1088 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1089 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1090 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1091 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
1092 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1093 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1094 ; NVPTX-DISABLED1: is_worker_check:
1095 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1096 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1097 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1098 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1099 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1100 ; NVPTX-DISABLED1: worker_state_machine.begin:
1101 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1102 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1103 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1104 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1105 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1106 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1107 ; NVPTX-DISABLED1: worker_state_machine.finished:
1108 ; NVPTX-DISABLED1-NEXT: ret void
1109 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
1110 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1111 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
1112 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1113 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
1114 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1115 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1116 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
1117 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1118 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
1119 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1120 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1121 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
1122 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1123 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1124 ; NVPTX-DISABLED1: thread.user_code.check:
1125 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1126 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1127 ; NVPTX-DISABLED1: common.ret:
1128 ; NVPTX-DISABLED1-NEXT: ret void
1129 ; NVPTX-DISABLED1: user_code.entry:
1130 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1131 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1132 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1133 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1134 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
1136 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
1137 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1138 ; NVPTX-DISABLED2-NEXT: entry:
1139 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1140 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1141 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1142 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
1143 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1144 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1145 ; NVPTX-DISABLED2: is_worker_check:
1146 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1147 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1148 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1149 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1150 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1151 ; NVPTX-DISABLED2: worker_state_machine.begin:
1152 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1153 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1154 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1155 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1156 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1157 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1158 ; NVPTX-DISABLED2: worker_state_machine.finished:
1159 ; NVPTX-DISABLED2-NEXT: ret void
1160 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
1161 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1162 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
1163 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1164 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
1165 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1166 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1167 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
1168 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1169 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
1170 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1171 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1172 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
1173 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1174 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1175 ; NVPTX-DISABLED2: thread.user_code.check:
1176 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1177 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1178 ; NVPTX-DISABLED2: common.ret:
1179 ; NVPTX-DISABLED2-NEXT: ret void
1180 ; NVPTX-DISABLED2: user_code.entry:
1181 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1182 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1183 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1184 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1185 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
1187 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
1188 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
1189 ; AMDGPU-DISABLED-NEXT: entry:
1190 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1191 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1192 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1193 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
1194 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1195 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1196 ; AMDGPU-DISABLED: is_worker_check:
1197 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1198 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1199 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1200 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1201 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1202 ; AMDGPU-DISABLED: worker_state_machine.begin:
1203 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1204 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1205 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1206 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1207 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1208 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1209 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1210 ; AMDGPU-DISABLED: worker_state_machine.finished:
1211 ; AMDGPU-DISABLED-NEXT: ret void
1212 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
1213 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1214 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
1215 ; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
1216 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
1217 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
1218 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1219 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1220 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
1221 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1222 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1223 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
1224 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
1225 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1226 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
1227 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1228 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1229 ; AMDGPU-DISABLED: thread.user_code.check:
1230 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1231 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1232 ; AMDGPU-DISABLED: common.ret:
1233 ; AMDGPU-DISABLED-NEXT: ret void
1234 ; AMDGPU-DISABLED: user_code.entry:
1235 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1236 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1237 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
1238 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
1239 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
1240 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
1241 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
1242 ; NVPTX-DISABLED-NEXT: entry:
1243 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1244 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1245 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1246 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
1247 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1248 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1249 ; NVPTX-DISABLED: is_worker_check:
1250 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1251 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1252 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1253 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1254 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1255 ; NVPTX-DISABLED: worker_state_machine.begin:
1256 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1257 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1258 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1259 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1260 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1261 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1262 ; NVPTX-DISABLED: worker_state_machine.finished:
1263 ; NVPTX-DISABLED-NEXT: ret void
1264 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
1265 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1266 ; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
1267 ; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
1268 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
1269 ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
1270 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1271 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1272 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
1273 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1274 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1275 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
1276 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
1277 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1278 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
1279 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1280 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1281 ; NVPTX-DISABLED: thread.user_code.check:
1282 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1283 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1284 ; NVPTX-DISABLED: common.ret:
1285 ; NVPTX-DISABLED-NEXT: ret void
1286 ; NVPTX-DISABLED: user_code.entry:
1287 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1288 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1289 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
1290 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
1291 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
1293 %.zero.addr = alloca i32, align 4
1294 %.threadid_temp. = alloca i32, align 4
1295 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
1296 %exec_user_code = icmp eq i32 %0, -1
1297 br i1 %exec_user_code, label %user_code.entry, label %common.ret
1299 common.ret: ; preds = %entry, %user_code.entry
1302 user_code.entry: ; preds = %entry
1303 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
1304 store i32 0, ptr %.zero.addr, align 4
1305 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
1306 call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6
1307 call void @__kmpc_target_deinit()
1308 br label %common.ret
1311 ; Function Attrs: alwaysinline convergent norecurse nounwind
1312 define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
1313 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
1314 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1315 ; AMDGPU-NEXT: entry:
1316 ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1317 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1318 ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1319 ; AMDGPU-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
1320 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
1322 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1323 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1324 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1325 ; AMDGPU: for.cond.cleanup:
1326 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
1327 ; AMDGPU-NEXT: ret void
1329 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1330 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1331 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1332 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1334 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
1335 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1336 ; NVPTX-NEXT: entry:
1337 ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1338 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1339 ; NVPTX-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
1340 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
1342 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1343 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1344 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1345 ; NVPTX: for.cond.cleanup:
1346 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
1347 ; NVPTX-NEXT: ret void
1349 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1350 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1351 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1352 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1354 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1355 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1356 ; AMDGPU-DISABLED1-NEXT: entry:
1357 ; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1358 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1359 ; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1360 ; AMDGPU-DISABLED1-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
1361 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1362 ; AMDGPU-DISABLED1: for.cond:
1363 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1364 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1365 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1366 ; AMDGPU-DISABLED1: for.cond.cleanup:
1367 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1368 ; AMDGPU-DISABLED1-NEXT: ret void
1369 ; AMDGPU-DISABLED1: for.body:
1370 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1371 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1372 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1373 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1375 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1376 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1377 ; AMDGPU-DISABLED2-NEXT: entry:
1378 ; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1379 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1380 ; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1381 ; AMDGPU-DISABLED2-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
1382 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1383 ; AMDGPU-DISABLED2: for.cond:
1384 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1385 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1386 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1387 ; AMDGPU-DISABLED2: for.cond.cleanup:
1388 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1389 ; AMDGPU-DISABLED2-NEXT: ret void
1390 ; AMDGPU-DISABLED2: for.body:
1391 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1392 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1393 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1394 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1396 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1397 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1398 ; NVPTX-DISABLED1-NEXT: entry:
1399 ; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1400 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1401 ; NVPTX-DISABLED1-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
1402 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1403 ; NVPTX-DISABLED1: for.cond:
1404 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1405 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1406 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1407 ; NVPTX-DISABLED1: for.cond.cleanup:
1408 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1409 ; NVPTX-DISABLED1-NEXT: ret void
1410 ; NVPTX-DISABLED1: for.body:
1411 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1412 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1413 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1414 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1416 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1417 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1418 ; NVPTX-DISABLED2-NEXT: entry:
1419 ; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1420 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1421 ; NVPTX-DISABLED2-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
1422 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1423 ; NVPTX-DISABLED2: for.cond:
1424 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1425 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1426 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1427 ; NVPTX-DISABLED2: for.cond.cleanup:
1428 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1429 ; NVPTX-DISABLED2-NEXT: ret void
1430 ; NVPTX-DISABLED2: for.body:
1431 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1432 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1433 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1434 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1436 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
1437 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1438 ; AMDGPU-DISABLED-NEXT: entry:
1439 ; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1440 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1441 ; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1442 ; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
1443 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
1444 ; AMDGPU-DISABLED: for.cond:
1445 ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1446 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1447 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1448 ; AMDGPU-DISABLED: for.cond.cleanup:
1449 ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
1450 ; AMDGPU-DISABLED-NEXT: ret void
1451 ; AMDGPU-DISABLED: for.body:
1452 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1453 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1454 ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1455 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1456 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
1457 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1458 ; NVPTX-DISABLED-NEXT: entry:
1459 ; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1460 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1461 ; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
1462 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
1463 ; NVPTX-DISABLED: for.cond:
1464 ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1465 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1466 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1467 ; NVPTX-DISABLED: for.cond.cleanup:
1468 ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
1469 ; NVPTX-DISABLED-NEXT: ret void
1470 ; NVPTX-DISABLED: for.body:
1471 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
1472 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1473 ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1474 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
1476 %captured_vars_addrs = alloca [0 x ptr], align 8
1477 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
1478 call void @use(ptr nocapture %x) #10
1481 for.cond: ; preds = %for.body, %entry
1482 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
1483 %cmp = icmp slt i32 %i.0, 100
1484 br i1 %cmp, label %for.body, label %for.cond.cleanup
1486 for.cond.cleanup: ; preds = %for.cond
1487 call void @spmd_amenable() #10
1488 call void @__kmpc_free_shared(ptr %x, i64 4)
1491 for.body: ; preds = %for.cond
1492 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
1493 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
1494 %inc = add nsw i32 %i.0, 1
1495 br label %for.cond, !llvm.loop !25
1497 ; Function Attrs: alwaysinline convergent norecurse nounwind
1498 define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
1499 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
1500 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1501 ; AMDGPU-NEXT: entry:
1502 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
1503 ; AMDGPU-NEXT: ret void
1505 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
1506 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1507 ; NVPTX-NEXT: entry:
1508 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
1509 ; NVPTX-NEXT: ret void
1511 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1512 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1513 ; AMDGPU-DISABLED1-NEXT: entry:
1514 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1515 ; AMDGPU-DISABLED1-NEXT: ret void
1517 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1518 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1519 ; AMDGPU-DISABLED2-NEXT: entry:
1520 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1521 ; AMDGPU-DISABLED2-NEXT: ret void
1523 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1524 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1525 ; NVPTX-DISABLED1-NEXT: entry:
1526 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1527 ; NVPTX-DISABLED1-NEXT: ret void
1529 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1530 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1531 ; NVPTX-DISABLED2-NEXT: entry:
1532 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1533 ; NVPTX-DISABLED2-NEXT: ret void
1535 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
1536 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1537 ; AMDGPU-DISABLED-NEXT: entry:
1538 ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
1539 ; AMDGPU-DISABLED-NEXT: ret void
1540 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
1541 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1542 ; NVPTX-DISABLED-NEXT: entry:
1543 ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
1544 ; NVPTX-DISABLED-NEXT: ret void
1546 call void @unknown() #11
1550 ; Function Attrs: convergent norecurse nounwind
1551 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
1552 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1553 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1554 ; AMDGPU-NEXT: entry:
1555 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1556 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1557 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1558 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1559 ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1560 ; AMDGPU-NEXT: ret void
1562 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1563 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1564 ; NVPTX-NEXT: entry:
1565 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1566 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1567 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1568 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1569 ; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1570 ; NVPTX-NEXT: ret void
1572 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1573 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1574 ; AMDGPU-DISABLED1-NEXT: entry:
1575 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1576 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1577 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1578 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1579 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1580 ; AMDGPU-DISABLED1-NEXT: ret void
1582 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1583 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1584 ; AMDGPU-DISABLED2-NEXT: entry:
1585 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1586 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1587 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1588 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1589 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1590 ; AMDGPU-DISABLED2-NEXT: ret void
1592 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1593 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1594 ; NVPTX-DISABLED1-NEXT: entry:
1595 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1596 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1597 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1598 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1599 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1600 ; NVPTX-DISABLED1-NEXT: ret void
1602 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1603 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1604 ; NVPTX-DISABLED2-NEXT: entry:
1605 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1606 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1607 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1608 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1609 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1610 ; NVPTX-DISABLED2-NEXT: ret void
1612 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1613 ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1614 ; AMDGPU-DISABLED-NEXT: entry:
1615 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1616 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1617 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1618 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1619 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1620 ; AMDGPU-DISABLED-NEXT: ret void
1621 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1622 ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1623 ; NVPTX-DISABLED-NEXT: entry:
1624 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1625 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1626 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1627 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1628 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1629 ; NVPTX-DISABLED-NEXT: ret void
1631 %.addr1 = alloca i32, align 4
1632 %.zero.addr = alloca i32, align 4
1633 %global_args = alloca ptr, align 8
1634 store i32 %1, ptr %.addr1, align 4, !tbaa !18
1635 store i32 0, ptr %.zero.addr, align 4
1636 call void @__kmpc_get_shared_variables(ptr %global_args)
1637 call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6
1642 ; Function Attrs: alwaysinline convergent norecurse nounwind
1643 define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
1644 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1645 ; AMDGPU-SAME: () #[[ATTR0]] {
1646 ; AMDGPU-NEXT: entry:
1647 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1648 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1649 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1650 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1651 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1652 ; AMDGPU: common.ret:
1653 ; AMDGPU-NEXT: ret void
1654 ; AMDGPU: user_code.entry:
1655 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1656 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1657 ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1658 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
1659 ; AMDGPU-NEXT: br label [[COMMON_RET]]
1661 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1662 ; NVPTX-SAME: () #[[ATTR0]] {
1663 ; NVPTX-NEXT: entry:
1664 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1665 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1666 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1667 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1668 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1669 ; NVPTX: common.ret:
1670 ; NVPTX-NEXT: ret void
1671 ; NVPTX: user_code.entry:
1672 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1673 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1674 ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1675 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
1676 ; NVPTX-NEXT: br label [[COMMON_RET]]
1678 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1679 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
1680 ; AMDGPU-DISABLED1-NEXT: entry:
1681 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1682 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1683 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1684 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1685 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1686 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1687 ; AMDGPU-DISABLED1: is_worker_check:
1688 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1689 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1690 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1691 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1692 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1693 ; AMDGPU-DISABLED1: worker_state_machine.begin:
1694 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1695 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1696 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1697 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1698 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1699 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1700 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1701 ; AMDGPU-DISABLED1: worker_state_machine.finished:
1702 ; AMDGPU-DISABLED1-NEXT: ret void
1703 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
1704 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1705 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
1706 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1707 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
1708 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1709 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1710 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
1711 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1712 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
1713 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1714 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1715 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
1716 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1717 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1718 ; AMDGPU-DISABLED1: thread.user_code.check:
1719 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1720 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1721 ; AMDGPU-DISABLED1: common.ret:
1722 ; AMDGPU-DISABLED1-NEXT: ret void
1723 ; AMDGPU-DISABLED1: user_code.entry:
1724 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1725 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1726 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1727 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1728 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
1730 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1731 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1732 ; AMDGPU-DISABLED2-NEXT: entry:
1733 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1734 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1735 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1736 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1737 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1738 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1739 ; AMDGPU-DISABLED2: is_worker_check:
1740 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1741 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1742 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1743 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1744 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1745 ; AMDGPU-DISABLED2: worker_state_machine.begin:
1746 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1747 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1748 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1749 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1750 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1751 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1752 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1753 ; AMDGPU-DISABLED2: worker_state_machine.finished:
1754 ; AMDGPU-DISABLED2-NEXT: ret void
1755 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
1756 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1757 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
1758 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1759 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
1760 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1761 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1762 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
1763 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1764 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
1765 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1766 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1767 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
1768 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1769 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1770 ; AMDGPU-DISABLED2: thread.user_code.check:
1771 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1772 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1773 ; AMDGPU-DISABLED2: common.ret:
1774 ; AMDGPU-DISABLED2-NEXT: ret void
1775 ; AMDGPU-DISABLED2: user_code.entry:
1776 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1777 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1778 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1779 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1780 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
1782 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1783 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1784 ; NVPTX-DISABLED1-NEXT: entry:
1785 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1786 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1787 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1788 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1789 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1790 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1791 ; NVPTX-DISABLED1: is_worker_check:
1792 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1793 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1794 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1795 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1796 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1797 ; NVPTX-DISABLED1: worker_state_machine.begin:
1798 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1799 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1800 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1801 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1802 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1803 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1804 ; NVPTX-DISABLED1: worker_state_machine.finished:
1805 ; NVPTX-DISABLED1-NEXT: ret void
1806 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
1807 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1808 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
1809 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1810 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
1811 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1812 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1813 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
1814 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1815 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
1816 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1817 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1818 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
1819 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1820 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1821 ; NVPTX-DISABLED1: thread.user_code.check:
1822 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1823 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1824 ; NVPTX-DISABLED1: common.ret:
1825 ; NVPTX-DISABLED1-NEXT: ret void
1826 ; NVPTX-DISABLED1: user_code.entry:
1827 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1828 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1829 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1830 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1831 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
1833 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1834 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1835 ; NVPTX-DISABLED2-NEXT: entry:
1836 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1837 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1838 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1839 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1840 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1841 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1842 ; NVPTX-DISABLED2: is_worker_check:
1843 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1844 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1845 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1846 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1847 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1848 ; NVPTX-DISABLED2: worker_state_machine.begin:
1849 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1850 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1851 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1852 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1853 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1854 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1855 ; NVPTX-DISABLED2: worker_state_machine.finished:
1856 ; NVPTX-DISABLED2-NEXT: ret void
1857 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
1858 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1859 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
1860 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1861 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
1862 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1863 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1864 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
1865 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1866 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
1867 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1868 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1869 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
1870 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1871 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1872 ; NVPTX-DISABLED2: thread.user_code.check:
1873 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1874 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1875 ; NVPTX-DISABLED2: common.ret:
1876 ; NVPTX-DISABLED2-NEXT: ret void
1877 ; NVPTX-DISABLED2: user_code.entry:
1878 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1879 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1880 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1881 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1882 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
1884 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1885 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
1886 ; AMDGPU-DISABLED-NEXT: entry:
1887 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1888 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1889 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1890 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
1891 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1892 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1893 ; AMDGPU-DISABLED: is_worker_check:
1894 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1895 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1896 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1897 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1898 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1899 ; AMDGPU-DISABLED: worker_state_machine.begin:
1900 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1901 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1902 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1903 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1904 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1905 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1906 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1907 ; AMDGPU-DISABLED: worker_state_machine.finished:
1908 ; AMDGPU-DISABLED-NEXT: ret void
1909 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
1910 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1911 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
1912 ; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
1913 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
1914 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
1915 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1916 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1917 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
1918 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1919 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1920 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
1921 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
1922 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1923 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
1924 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1925 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1926 ; AMDGPU-DISABLED: thread.user_code.check:
1927 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1928 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1929 ; AMDGPU-DISABLED: common.ret:
1930 ; AMDGPU-DISABLED-NEXT: ret void
1931 ; AMDGPU-DISABLED: user_code.entry:
1932 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1933 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1934 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
1935 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
1936 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
1937 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1938 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
1939 ; NVPTX-DISABLED-NEXT: entry:
1940 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1941 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1942 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1943 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
1944 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1945 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1946 ; NVPTX-DISABLED: is_worker_check:
1947 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1948 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1949 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1950 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1951 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1952 ; NVPTX-DISABLED: worker_state_machine.begin:
1953 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1954 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1955 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1956 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
1957 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1958 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1959 ; NVPTX-DISABLED: worker_state_machine.finished:
1960 ; NVPTX-DISABLED-NEXT: ret void
1961 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
1962 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1963 ; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
1964 ; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
1965 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
1966 ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
1967 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1968 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1969 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
1970 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1971 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1972 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
1973 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
1974 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1975 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
1976 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1977 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1978 ; NVPTX-DISABLED: thread.user_code.check:
1979 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1980 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1981 ; NVPTX-DISABLED: common.ret:
1982 ; NVPTX-DISABLED-NEXT: ret void
1983 ; NVPTX-DISABLED: user_code.entry:
1984 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1985 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
1986 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
1987 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
1988 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
1990 %.zero.addr = alloca i32, align 4
1991 %.threadid_temp. = alloca i32, align 4
1992 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1993 %exec_user_code = icmp eq i32 %0, -1
1994 br i1 %exec_user_code, label %user_code.entry, label %common.ret
1996 common.ret: ; preds = %entry, %user_code.entry
1999 user_code.entry: ; preds = %entry
2000 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
2001 store i32 0, ptr %.zero.addr, align 4
2002 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
2003 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6
2004 call void @__kmpc_target_deinit()
2005 br label %common.ret
2008 ; Function Attrs: alwaysinline convergent norecurse nounwind
2009 define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
2010 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
2011 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2012 ; AMDGPU-NEXT: entry:
2013 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2014 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
2016 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2017 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2018 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2019 ; AMDGPU: for.cond.cleanup:
2020 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
2021 ; AMDGPU-NEXT: ret void
2023 ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2024 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2025 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2026 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2027 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2029 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
2030 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2031 ; NVPTX-NEXT: entry:
2032 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2033 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
2035 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2036 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2037 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2038 ; NVPTX: for.cond.cleanup:
2039 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
2040 ; NVPTX-NEXT: ret void
2042 ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2043 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2044 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2045 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2046 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2048 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
2049 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2050 ; AMDGPU-DISABLED1-NEXT: entry:
2051 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2052 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2053 ; AMDGPU-DISABLED1: for.cond:
2054 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2055 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2056 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2057 ; AMDGPU-DISABLED1: for.cond.cleanup:
2058 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2059 ; AMDGPU-DISABLED1-NEXT: ret void
2060 ; AMDGPU-DISABLED1: for.body:
2061 ; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2062 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2063 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2064 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2065 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2067 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
2068 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2069 ; AMDGPU-DISABLED2-NEXT: entry:
2070 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2071 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2072 ; AMDGPU-DISABLED2: for.cond:
2073 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2074 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2075 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2076 ; AMDGPU-DISABLED2: for.cond.cleanup:
2077 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2078 ; AMDGPU-DISABLED2-NEXT: ret void
2079 ; AMDGPU-DISABLED2: for.body:
2080 ; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2081 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2082 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2083 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2084 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2086 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
2087 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2088 ; NVPTX-DISABLED1-NEXT: entry:
2089 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2090 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2091 ; NVPTX-DISABLED1: for.cond:
2092 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2093 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2094 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2095 ; NVPTX-DISABLED1: for.cond.cleanup:
2096 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2097 ; NVPTX-DISABLED1-NEXT: ret void
2098 ; NVPTX-DISABLED1: for.body:
2099 ; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2100 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2101 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2102 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2103 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2105 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
2106 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2107 ; NVPTX-DISABLED2-NEXT: entry:
2108 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2109 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2110 ; NVPTX-DISABLED2: for.cond:
2111 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2112 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2113 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2114 ; NVPTX-DISABLED2: for.cond.cleanup:
2115 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2116 ; NVPTX-DISABLED2-NEXT: ret void
2117 ; NVPTX-DISABLED2: for.body:
2118 ; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2119 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2120 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2121 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2122 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2124 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
2125 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2126 ; AMDGPU-DISABLED-NEXT: entry:
2127 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2128 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
2129 ; AMDGPU-DISABLED: for.cond:
2130 ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2131 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2132 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2133 ; AMDGPU-DISABLED: for.cond.cleanup:
2134 ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
2135 ; AMDGPU-DISABLED-NEXT: ret void
2136 ; AMDGPU-DISABLED: for.body:
2137 ; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2138 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2139 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2140 ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2141 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2142 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
2143 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2144 ; NVPTX-DISABLED-NEXT: entry:
2145 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2146 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
2147 ; NVPTX-DISABLED: for.cond:
2148 ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2149 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2150 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2151 ; NVPTX-DISABLED: for.cond.cleanup:
2152 ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
2153 ; NVPTX-DISABLED-NEXT: ret void
2154 ; NVPTX-DISABLED: for.body:
2155 ; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
2156 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2157 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2158 ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2159 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
2161 %captured_vars_addrs = alloca [1 x ptr], align 8
2162 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
2165 for.cond: ; preds = %for.body, %entry
2166 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2167 %cmp = icmp slt i32 %i.0, 100
2168 br i1 %cmp, label %for.body, label %for.cond.cleanup
2170 for.cond.cleanup: ; preds = %for.cond
2171 call void @spmd_amenable() #10
2172 call void @__kmpc_free_shared(ptr %x, i64 4)
2175 for.body: ; preds = %for.cond
2176 store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
2177 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
2178 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
2179 %inc = add nsw i32 %i.0, 1
2180 br label %for.cond, !llvm.loop !28
2183 ; Function Attrs: alwaysinline convergent norecurse nounwind
2184 define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
2185 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
2186 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2187 ; AMDGPU-NEXT: entry:
2188 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2189 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2190 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2191 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
2192 ; AMDGPU-NEXT: ret void
2194 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
2195 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2196 ; NVPTX-NEXT: entry:
2197 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2198 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2199 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2200 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
2201 ; NVPTX-NEXT: ret void
2203 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
2204 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2205 ; AMDGPU-DISABLED1-NEXT: entry:
2206 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2207 ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2208 ; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2209 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
2210 ; AMDGPU-DISABLED1-NEXT: ret void
2212 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
2213 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2214 ; AMDGPU-DISABLED2-NEXT: entry:
2215 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2216 ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2217 ; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2218 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
2219 ; AMDGPU-DISABLED2-NEXT: ret void
2221 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
2222 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2223 ; NVPTX-DISABLED1-NEXT: entry:
2224 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2225 ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2226 ; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2227 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
2228 ; NVPTX-DISABLED1-NEXT: ret void
2230 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
2231 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2232 ; NVPTX-DISABLED2-NEXT: entry:
2233 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2234 ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2235 ; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2236 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
2237 ; NVPTX-DISABLED2-NEXT: ret void
2239 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
2240 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2241 ; AMDGPU-DISABLED-NEXT: entry:
2242 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2243 ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2244 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2245 ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
2246 ; AMDGPU-DISABLED-NEXT: ret void
2247 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
2248 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2249 ; NVPTX-DISABLED-NEXT: entry:
2250 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2251 ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2252 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2253 ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
2254 ; NVPTX-DISABLED-NEXT: ret void
2256 %0 = load i32, ptr %x, align 4, !tbaa !18
2257 %inc = add nsw i32 %0, 1
2258 store i32 %inc, ptr %x, align 4, !tbaa !18
2259 call void @unknown() #11
2263 ; Function Attrs: convergent norecurse nounwind
2264 define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
2265 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2266 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2267 ; AMDGPU-NEXT: entry:
2268 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2269 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2270 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2271 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2272 ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2273 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2274 ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2275 ; AMDGPU-NEXT: ret void
2277 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2278 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2279 ; NVPTX-NEXT: entry:
2280 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2281 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2282 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2283 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2284 ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2285 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2286 ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2287 ; NVPTX-NEXT: ret void
2289 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2290 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2291 ; AMDGPU-DISABLED1-NEXT: entry:
2292 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2293 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2294 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2295 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2296 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2297 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2298 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2299 ; AMDGPU-DISABLED1-NEXT: ret void
2301 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2302 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2303 ; AMDGPU-DISABLED2-NEXT: entry:
2304 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2305 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2306 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2307 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2308 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2309 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2310 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2311 ; AMDGPU-DISABLED2-NEXT: ret void
2313 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2314 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2315 ; NVPTX-DISABLED1-NEXT: entry:
2316 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2317 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2318 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2319 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2320 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2321 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2322 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2323 ; NVPTX-DISABLED1-NEXT: ret void
2325 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2326 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2327 ; NVPTX-DISABLED2-NEXT: entry:
2328 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2329 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2330 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2331 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2332 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2333 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2334 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2335 ; NVPTX-DISABLED2-NEXT: ret void
2337 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2338 ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2339 ; AMDGPU-DISABLED-NEXT: entry:
2340 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2341 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2342 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2343 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2344 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2345 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2346 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2347 ; AMDGPU-DISABLED-NEXT: ret void
2348 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2349 ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2350 ; NVPTX-DISABLED-NEXT: entry:
2351 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2352 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2353 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2354 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2355 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2356 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
2357 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2358 ; NVPTX-DISABLED-NEXT: ret void
2360 %.addr1 = alloca i32, align 4
2361 %.zero.addr = alloca i32, align 4
2362 %global_args = alloca ptr, align 8
2363 store i32 %1, ptr %.addr1, align 4, !tbaa !18
2364 store i32 0, ptr %.zero.addr, align 4
2365 call void @__kmpc_get_shared_variables(ptr %global_args)
2366 %2 = load ptr, ptr %global_args, align 8
2367 %3 = load ptr, ptr %2, align 8, !tbaa !26
2368 call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
2372 ; Function Attrs: alwaysinline convergent norecurse nounwind
2373 define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
2374 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2375 ; AMDGPU-SAME: () #[[ATTR0]] {
2376 ; AMDGPU-NEXT: entry:
2377 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2378 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2379 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2380 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2381 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2382 ; AMDGPU: common.ret:
2383 ; AMDGPU-NEXT: ret void
2384 ; AMDGPU: user_code.entry:
2385 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2386 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2387 ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2388 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
2389 ; AMDGPU-NEXT: br label [[COMMON_RET]]
2391 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2392 ; NVPTX-SAME: () #[[ATTR0]] {
2393 ; NVPTX-NEXT: entry:
2394 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2395 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2396 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2397 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2398 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2399 ; NVPTX: common.ret:
2400 ; NVPTX-NEXT: ret void
2401 ; NVPTX: user_code.entry:
2402 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2403 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2404 ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2405 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
2406 ; NVPTX-NEXT: br label [[COMMON_RET]]
2408 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2409 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
2410 ; AMDGPU-DISABLED1-NEXT: entry:
2411 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2412 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2413 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2414 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2415 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2416 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2417 ; AMDGPU-DISABLED1: is_worker_check:
2418 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2419 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2420 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2421 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2422 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2423 ; AMDGPU-DISABLED1: worker_state_machine.begin:
2424 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2425 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2426 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2427 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2428 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2429 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2430 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2431 ; AMDGPU-DISABLED1: worker_state_machine.finished:
2432 ; AMDGPU-DISABLED1-NEXT: ret void
2433 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
2434 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2435 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
2436 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2437 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
2438 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2439 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2440 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
2441 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2442 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
2443 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2444 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2445 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
2446 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2447 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2448 ; AMDGPU-DISABLED1: thread.user_code.check:
2449 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2450 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2451 ; AMDGPU-DISABLED1: common.ret:
2452 ; AMDGPU-DISABLED1-NEXT: ret void
2453 ; AMDGPU-DISABLED1: user_code.entry:
2454 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2455 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2456 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2457 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2458 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
2460 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2461 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
2462 ; AMDGPU-DISABLED2-NEXT: entry:
2463 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2464 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2465 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2466 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2467 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2468 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2469 ; AMDGPU-DISABLED2: is_worker_check:
2470 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2471 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2472 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2473 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2474 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2475 ; AMDGPU-DISABLED2: worker_state_machine.begin:
2476 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2477 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2478 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2479 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2480 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2481 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2482 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2483 ; AMDGPU-DISABLED2: worker_state_machine.finished:
2484 ; AMDGPU-DISABLED2-NEXT: ret void
2485 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
2486 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2487 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
2488 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2489 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
2490 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2491 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2492 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
2493 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2494 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
2495 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2496 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2497 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
2498 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2499 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2500 ; AMDGPU-DISABLED2: thread.user_code.check:
2501 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2502 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2503 ; AMDGPU-DISABLED2: common.ret:
2504 ; AMDGPU-DISABLED2-NEXT: ret void
2505 ; AMDGPU-DISABLED2: user_code.entry:
2506 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2507 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2508 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2509 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2510 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
2512 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2513 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
2514 ; NVPTX-DISABLED1-NEXT: entry:
2515 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2516 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2517 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2518 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2519 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2520 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2521 ; NVPTX-DISABLED1: is_worker_check:
2522 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2523 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2524 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2525 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2526 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2527 ; NVPTX-DISABLED1: worker_state_machine.begin:
2528 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2529 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2530 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2531 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2532 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2533 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2534 ; NVPTX-DISABLED1: worker_state_machine.finished:
2535 ; NVPTX-DISABLED1-NEXT: ret void
2536 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
2537 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2538 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
2539 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2540 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
2541 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2542 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2543 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
2544 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2545 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
2546 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2547 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2548 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
2549 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2550 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2551 ; NVPTX-DISABLED1: thread.user_code.check:
2552 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2553 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2554 ; NVPTX-DISABLED1: common.ret:
2555 ; NVPTX-DISABLED1-NEXT: ret void
2556 ; NVPTX-DISABLED1: user_code.entry:
2557 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2558 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2559 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2560 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2561 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
2563 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2564 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
2565 ; NVPTX-DISABLED2-NEXT: entry:
2566 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2567 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2568 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2569 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2570 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2571 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2572 ; NVPTX-DISABLED2: is_worker_check:
2573 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2574 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2575 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2576 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2577 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2578 ; NVPTX-DISABLED2: worker_state_machine.begin:
2579 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2580 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2581 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2582 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2583 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2584 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2585 ; NVPTX-DISABLED2: worker_state_machine.finished:
2586 ; NVPTX-DISABLED2-NEXT: ret void
2587 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
2588 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2589 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
2590 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2591 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
2592 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2593 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2594 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
2595 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2596 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
2597 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2598 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2599 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
2600 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2601 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2602 ; NVPTX-DISABLED2: thread.user_code.check:
2603 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2604 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2605 ; NVPTX-DISABLED2: common.ret:
2606 ; NVPTX-DISABLED2-NEXT: ret void
2607 ; NVPTX-DISABLED2: user_code.entry:
2608 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2609 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2610 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2611 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2612 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
2614 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2615 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
2616 ; AMDGPU-DISABLED-NEXT: entry:
2617 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2618 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2619 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2620 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
2621 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2622 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2623 ; AMDGPU-DISABLED: is_worker_check:
2624 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2625 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2626 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2627 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2628 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2629 ; AMDGPU-DISABLED: worker_state_machine.begin:
2630 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2631 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2632 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2633 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2634 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2635 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2636 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2637 ; AMDGPU-DISABLED: worker_state_machine.finished:
2638 ; AMDGPU-DISABLED-NEXT: ret void
2639 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
2640 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2641 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
2642 ; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
2643 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2644 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
2645 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2646 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2647 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
2648 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
2649 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2650 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
2651 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
2652 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2653 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
2654 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2655 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2656 ; AMDGPU-DISABLED: thread.user_code.check:
2657 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2658 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2659 ; AMDGPU-DISABLED: common.ret:
2660 ; AMDGPU-DISABLED-NEXT: ret void
2661 ; AMDGPU-DISABLED: user_code.entry:
2662 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2663 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2664 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
2665 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
2666 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
2667 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
2668 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
2669 ; NVPTX-DISABLED-NEXT: entry:
2670 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2671 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2672 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2673 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
2674 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2675 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2676 ; NVPTX-DISABLED: is_worker_check:
2677 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2678 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2679 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2680 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2681 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2682 ; NVPTX-DISABLED: worker_state_machine.begin:
2683 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2684 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2685 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2686 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
2687 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2688 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2689 ; NVPTX-DISABLED: worker_state_machine.finished:
2690 ; NVPTX-DISABLED-NEXT: ret void
2691 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
2692 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2693 ; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
2694 ; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
2695 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2696 ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
2697 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2698 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2699 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
2700 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
2701 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2702 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
2703 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
2704 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2705 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
2706 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2707 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2708 ; NVPTX-DISABLED: thread.user_code.check:
2709 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2710 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2711 ; NVPTX-DISABLED: common.ret:
2712 ; NVPTX-DISABLED-NEXT: ret void
2713 ; NVPTX-DISABLED: user_code.entry:
2714 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2715 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
2716 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
2717 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
2718 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
2720 %.zero.addr = alloca i32, align 4
2721 %.threadid_temp. = alloca i32, align 4
2722 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2723 %exec_user_code = icmp eq i32 %0, -1
2724 br i1 %exec_user_code, label %user_code.entry, label %common.ret
2726 common.ret: ; preds = %entry, %user_code.entry
2729 user_code.entry: ; preds = %entry
2730 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
2731 store i32 0, ptr %.zero.addr, align 4
2732 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
2733 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6
2734 call void @__kmpc_target_deinit()
2735 br label %common.ret
2738 ; Function Attrs: alwaysinline convergent norecurse nounwind
2739 define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
2740 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
2741 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2742 ; AMDGPU-NEXT: entry:
2743 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2744 ; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]]
2745 ; AMDGPU: region.check.tid:
2746 ; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2747 ; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2748 ; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2749 ; AMDGPU: region.guarded:
2750 ; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
2751 ; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]]
2752 ; AMDGPU: region.guarded.end:
2753 ; AMDGPU-NEXT: br label [[REGION_BARRIER]]
2754 ; AMDGPU: region.barrier:
2755 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2756 ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]]
2757 ; AMDGPU: region.exit:
2758 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
2760 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2761 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2762 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2763 ; AMDGPU: for.cond.cleanup:
2764 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
2765 ; AMDGPU-NEXT: ret void
2767 ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2768 ; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2769 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2770 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2771 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2773 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
2774 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2775 ; NVPTX-NEXT: entry:
2776 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2777 ; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]]
2778 ; NVPTX: region.check.tid:
2779 ; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2780 ; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2781 ; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2782 ; NVPTX: region.guarded:
2783 ; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
2784 ; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]]
2785 ; NVPTX: region.guarded.end:
2786 ; NVPTX-NEXT: br label [[REGION_BARRIER]]
2787 ; NVPTX: region.barrier:
2788 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2789 ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]]
2790 ; NVPTX: region.exit:
2791 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
2793 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2794 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2795 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2796 ; NVPTX: for.cond.cleanup:
2797 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
2798 ; NVPTX-NEXT: ret void
2800 ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2801 ; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2802 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2803 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2804 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2806 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2807 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2808 ; AMDGPU-DISABLED1-NEXT: entry:
2809 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2810 ; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
2811 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2812 ; AMDGPU-DISABLED1: for.cond:
2813 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2814 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2815 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2816 ; AMDGPU-DISABLED1: for.cond.cleanup:
2817 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2818 ; AMDGPU-DISABLED1-NEXT: ret void
2819 ; AMDGPU-DISABLED1: for.body:
2820 ; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2821 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2822 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2823 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2824 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2826 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2827 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2828 ; AMDGPU-DISABLED2-NEXT: entry:
2829 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2830 ; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
2831 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2832 ; AMDGPU-DISABLED2: for.cond:
2833 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2834 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2835 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2836 ; AMDGPU-DISABLED2: for.cond.cleanup:
2837 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2838 ; AMDGPU-DISABLED2-NEXT: ret void
2839 ; AMDGPU-DISABLED2: for.body:
2840 ; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2841 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2842 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2843 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2844 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2846 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2847 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2848 ; NVPTX-DISABLED1-NEXT: entry:
2849 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2850 ; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
2851 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2852 ; NVPTX-DISABLED1: for.cond:
2853 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2854 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2855 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2856 ; NVPTX-DISABLED1: for.cond.cleanup:
2857 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2858 ; NVPTX-DISABLED1-NEXT: ret void
2859 ; NVPTX-DISABLED1: for.body:
2860 ; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2861 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2862 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2863 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2864 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2866 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2867 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2868 ; NVPTX-DISABLED2-NEXT: entry:
2869 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2870 ; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
2871 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2872 ; NVPTX-DISABLED2: for.cond:
2873 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2874 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2875 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2876 ; NVPTX-DISABLED2: for.cond.cleanup:
2877 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2878 ; NVPTX-DISABLED2-NEXT: ret void
2879 ; NVPTX-DISABLED2: for.body:
2880 ; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2881 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2882 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2883 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2884 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2886 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
2887 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2888 ; AMDGPU-DISABLED-NEXT: entry:
2889 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2890 ; AMDGPU-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
2891 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
2892 ; AMDGPU-DISABLED: for.cond:
2893 ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2894 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2895 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2896 ; AMDGPU-DISABLED: for.cond.cleanup:
2897 ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
2898 ; AMDGPU-DISABLED-NEXT: ret void
2899 ; AMDGPU-DISABLED: for.body:
2900 ; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2901 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2902 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2903 ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2904 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2905 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
2906 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2907 ; NVPTX-DISABLED-NEXT: entry:
2908 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2909 ; NVPTX-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
2910 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
2911 ; NVPTX-DISABLED: for.cond:
2912 ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2913 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2914 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2915 ; NVPTX-DISABLED: for.cond.cleanup:
2916 ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
2917 ; NVPTX-DISABLED-NEXT: ret void
2918 ; NVPTX-DISABLED: for.body:
2919 ; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
2920 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
2921 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2922 ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2923 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
2925 %captured_vars_addrs = alloca [1 x ptr], align 8
2926 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
2927 store i32 42, ptr %x, align 4, !tbaa !18
2930 for.cond: ; preds = %for.body, %entry
2931 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2932 %cmp = icmp slt i32 %i.0, 100
2933 br i1 %cmp, label %for.body, label %for.cond.cleanup
2935 for.cond.cleanup: ; preds = %for.cond
2936 call void @spmd_amenable() #10
2937 call void @__kmpc_free_shared(ptr %x, i64 4)
2940 for.body: ; preds = %for.cond
2941 store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
2942 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
2943 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1)
2944 %inc = add nsw i32 %i.0, 1
2945 br label %for.cond, !llvm.loop !29
2948 ; Function Attrs: alwaysinline convergent norecurse nounwind
2949 define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
2950 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
2951 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2952 ; AMDGPU-NEXT: entry:
2953 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2954 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2955 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2956 ; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2957 ; AMDGPU-NEXT: ret void
2959 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
2960 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2961 ; NVPTX-NEXT: entry:
2962 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2963 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2964 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2965 ; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2966 ; NVPTX-NEXT: ret void
2968 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2969 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2970 ; AMDGPU-DISABLED1-NEXT: entry:
2971 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2972 ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2973 ; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2974 ; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2975 ; AMDGPU-DISABLED1-NEXT: ret void
2977 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2978 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2979 ; AMDGPU-DISABLED2-NEXT: entry:
2980 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2981 ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2982 ; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2983 ; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2984 ; AMDGPU-DISABLED2-NEXT: ret void
2986 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2987 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2988 ; NVPTX-DISABLED1-NEXT: entry:
2989 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2990 ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2991 ; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
2992 ; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2993 ; NVPTX-DISABLED1-NEXT: ret void
2995 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2996 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2997 ; NVPTX-DISABLED2-NEXT: entry:
2998 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
2999 ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
3000 ; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
3001 ; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
3002 ; NVPTX-DISABLED2-NEXT: ret void
3004 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
3005 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
3006 ; AMDGPU-DISABLED-NEXT: entry:
3007 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
3008 ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
3009 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
3010 ; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
3011 ; AMDGPU-DISABLED-NEXT: ret void
3012 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
3013 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
3014 ; NVPTX-DISABLED-NEXT: entry:
3015 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
3016 ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
3017 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
3018 ; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
3019 ; NVPTX-DISABLED-NEXT: ret void
3021 %0 = load i32, ptr %x, align 4, !tbaa !18
3022 %inc = add nsw i32 %0, 1
3023 store i32 %inc, ptr %x, align 4, !tbaa !18
3024 call void @unknowni32p(ptr %x) #11
3028 ; Function Attrs: convergent norecurse nounwind
3029 define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
3030 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3031 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3032 ; AMDGPU-NEXT: entry:
3033 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3034 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3035 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3036 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3037 ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3038 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3039 ; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3040 ; AMDGPU-NEXT: ret void
3042 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3043 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3044 ; NVPTX-NEXT: entry:
3045 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3046 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3047 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3048 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3049 ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3050 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3051 ; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3052 ; NVPTX-NEXT: ret void
3054 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3055 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3056 ; AMDGPU-DISABLED1-NEXT: entry:
3057 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3058 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3059 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3060 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3061 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3062 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3063 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3064 ; AMDGPU-DISABLED1-NEXT: ret void
3066 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3067 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3068 ; AMDGPU-DISABLED2-NEXT: entry:
3069 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3070 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3071 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3072 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3073 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3074 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3075 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3076 ; AMDGPU-DISABLED2-NEXT: ret void
3078 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3079 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3080 ; NVPTX-DISABLED1-NEXT: entry:
3081 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3082 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3083 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3084 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3085 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3086 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3087 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3088 ; NVPTX-DISABLED1-NEXT: ret void
3090 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3091 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3092 ; NVPTX-DISABLED2-NEXT: entry:
3093 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3094 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3095 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3096 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3097 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3098 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3099 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3100 ; NVPTX-DISABLED2-NEXT: ret void
3102 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3103 ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3104 ; AMDGPU-DISABLED-NEXT: entry:
3105 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3106 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3107 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3108 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3109 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3110 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3111 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3112 ; AMDGPU-DISABLED-NEXT: ret void
3113 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3114 ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3115 ; NVPTX-DISABLED-NEXT: entry:
3116 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3117 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3118 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3119 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3120 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
3121 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
3122 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
3123 ; NVPTX-DISABLED-NEXT: ret void
3125 %.addr1 = alloca i32, align 4
3126 %.zero.addr = alloca i32, align 4
3127 %global_args = alloca ptr, align 8
3128 store i32 %1, ptr %.addr1, align 4, !tbaa !18
3129 store i32 0, ptr %.zero.addr, align 4
3130 call void @__kmpc_get_shared_variables(ptr %global_args)
3131 %2 = load ptr, ptr %global_args, align 8
3132 %3 = load ptr, ptr %2, align 8, !tbaa !26
3133 call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
3137 ; Function Attrs: alwaysinline convergent norecurse nounwind
3138 define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
3139 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3140 ; AMDGPU-SAME: () #[[ATTR0]] {
3141 ; AMDGPU-NEXT: entry:
3142 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3143 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3144 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3145 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3146 ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3147 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3148 ; AMDGPU: is_worker_check:
3149 ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3150 ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3151 ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3152 ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3153 ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3154 ; AMDGPU: worker_state_machine.begin:
3155 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3156 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3157 ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3158 ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3159 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3160 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3161 ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3162 ; AMDGPU: worker_state_machine.finished:
3163 ; AMDGPU-NEXT: ret void
3164 ; AMDGPU: worker_state_machine.is_active.check:
3165 ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3166 ; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
3167 ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3168 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3169 ; AMDGPU: worker_state_machine.parallel_region.end:
3170 ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
3171 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3172 ; AMDGPU: worker_state_machine.done.barrier:
3173 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3174 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3175 ; AMDGPU: thread.user_code.check:
3176 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3177 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3178 ; AMDGPU: common.ret:
3179 ; AMDGPU-NEXT: ret void
3180 ; AMDGPU: user_code.entry:
3181 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3182 ; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3183 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
3184 ; AMDGPU-NEXT: br label [[COMMON_RET]]
3186 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3187 ; NVPTX-SAME: () #[[ATTR0]] {
3188 ; NVPTX-NEXT: entry:
3189 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3190 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3191 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3192 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3193 ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3194 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3195 ; NVPTX: is_worker_check:
3196 ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3197 ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3198 ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3199 ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3200 ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3201 ; NVPTX: worker_state_machine.begin:
3202 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3203 ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3204 ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3205 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3206 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3207 ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3208 ; NVPTX: worker_state_machine.finished:
3209 ; NVPTX-NEXT: ret void
3210 ; NVPTX: worker_state_machine.is_active.check:
3211 ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3212 ; NVPTX: worker_state_machine.parallel_region.fallback.execute:
3213 ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3214 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3215 ; NVPTX: worker_state_machine.parallel_region.end:
3216 ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
3217 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3218 ; NVPTX: worker_state_machine.done.barrier:
3219 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3220 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3221 ; NVPTX: thread.user_code.check:
3222 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3223 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3224 ; NVPTX: common.ret:
3225 ; NVPTX-NEXT: ret void
3226 ; NVPTX: user_code.entry:
3227 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3228 ; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3229 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
3230 ; NVPTX-NEXT: br label [[COMMON_RET]]
3232 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3233 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
3234 ; AMDGPU-DISABLED1-NEXT: entry:
3235 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3236 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3237 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3238 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3239 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3240 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3241 ; AMDGPU-DISABLED1: is_worker_check:
3242 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3243 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3244 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3245 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3246 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3247 ; AMDGPU-DISABLED1: worker_state_machine.begin:
3248 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3249 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3250 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3251 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3252 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3253 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3254 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3255 ; AMDGPU-DISABLED1: worker_state_machine.finished:
3256 ; AMDGPU-DISABLED1-NEXT: ret void
3257 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
3258 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3259 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
3260 ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3261 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3262 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
3263 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
3264 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3265 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
3266 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3267 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3268 ; AMDGPU-DISABLED1: thread.user_code.check:
3269 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3270 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3271 ; AMDGPU-DISABLED1: common.ret:
3272 ; AMDGPU-DISABLED1-NEXT: ret void
3273 ; AMDGPU-DISABLED1: user_code.entry:
3274 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3275 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3276 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
3277 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
3279 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3280 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
3281 ; AMDGPU-DISABLED2-NEXT: entry:
3282 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3283 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3284 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3285 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3286 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3287 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3288 ; AMDGPU-DISABLED2: is_worker_check:
3289 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3290 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3291 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3292 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3293 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3294 ; AMDGPU-DISABLED2: worker_state_machine.begin:
3295 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3296 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3297 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3298 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3299 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3300 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3301 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3302 ; AMDGPU-DISABLED2: worker_state_machine.finished:
3303 ; AMDGPU-DISABLED2-NEXT: ret void
3304 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
3305 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3306 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
3307 ; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3308 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3309 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
3310 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
3311 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3312 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
3313 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3314 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3315 ; AMDGPU-DISABLED2: thread.user_code.check:
3316 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3317 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3318 ; AMDGPU-DISABLED2: common.ret:
3319 ; AMDGPU-DISABLED2-NEXT: ret void
3320 ; AMDGPU-DISABLED2: user_code.entry:
3321 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3322 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3323 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
3324 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
3326 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3327 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
3328 ; NVPTX-DISABLED1-NEXT: entry:
3329 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3330 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3331 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3332 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3333 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3334 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3335 ; NVPTX-DISABLED1: is_worker_check:
3336 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3337 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3338 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3339 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3340 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3341 ; NVPTX-DISABLED1: worker_state_machine.begin:
3342 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3343 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3344 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3345 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3346 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3347 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3348 ; NVPTX-DISABLED1: worker_state_machine.finished:
3349 ; NVPTX-DISABLED1-NEXT: ret void
3350 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
3351 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3352 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
3353 ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3354 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3355 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
3356 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
3357 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3358 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
3359 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3360 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3361 ; NVPTX-DISABLED1: thread.user_code.check:
3362 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3363 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3364 ; NVPTX-DISABLED1: common.ret:
3365 ; NVPTX-DISABLED1-NEXT: ret void
3366 ; NVPTX-DISABLED1: user_code.entry:
3367 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3368 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3369 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
3370 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
3372 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3373 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
3374 ; NVPTX-DISABLED2-NEXT: entry:
3375 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3376 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3377 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3378 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3379 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3380 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3381 ; NVPTX-DISABLED2: is_worker_check:
3382 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3383 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3384 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3385 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3386 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3387 ; NVPTX-DISABLED2: worker_state_machine.begin:
3388 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3389 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3390 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3391 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3392 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3393 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3394 ; NVPTX-DISABLED2: worker_state_machine.finished:
3395 ; NVPTX-DISABLED2-NEXT: ret void
3396 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
3397 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3398 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
3399 ; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3400 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3401 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
3402 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
3403 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3404 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
3405 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3406 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3407 ; NVPTX-DISABLED2: thread.user_code.check:
3408 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3409 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3410 ; NVPTX-DISABLED2: common.ret:
3411 ; NVPTX-DISABLED2-NEXT: ret void
3412 ; NVPTX-DISABLED2: user_code.entry:
3413 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3414 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3415 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
3416 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
3418 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3419 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
3420 ; AMDGPU-DISABLED-NEXT: entry:
3421 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3422 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3423 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3424 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
3425 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3426 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3427 ; AMDGPU-DISABLED: is_worker_check:
3428 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3429 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3430 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3431 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3432 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3433 ; AMDGPU-DISABLED: worker_state_machine.begin:
3434 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3435 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3436 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3437 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3438 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3439 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3440 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3441 ; AMDGPU-DISABLED: worker_state_machine.finished:
3442 ; AMDGPU-DISABLED-NEXT: ret void
3443 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
3444 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3445 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
3446 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3447 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3448 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
3449 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
3450 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3451 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
3452 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3453 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3454 ; AMDGPU-DISABLED: thread.user_code.check:
3455 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3456 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3457 ; AMDGPU-DISABLED: common.ret:
3458 ; AMDGPU-DISABLED-NEXT: ret void
3459 ; AMDGPU-DISABLED: user_code.entry:
3460 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
3461 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
3462 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
3463 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
3464 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
3465 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
3466 ; NVPTX-DISABLED-NEXT: entry:
3467 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3468 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3469 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3470 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
3471 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3472 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3473 ; NVPTX-DISABLED: is_worker_check:
3474 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3475 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3476 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3477 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3478 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3479 ; NVPTX-DISABLED: worker_state_machine.begin:
3480 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3481 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3482 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3483 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3484 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3485 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3486 ; NVPTX-DISABLED: worker_state_machine.finished:
3487 ; NVPTX-DISABLED-NEXT: ret void
3488 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
3489 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3490 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
3491 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3492 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3493 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
3494 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
3495 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3496 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
3497 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3498 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3499 ; NVPTX-DISABLED: thread.user_code.check:
3500 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3501 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3502 ; NVPTX-DISABLED: common.ret:
3503 ; NVPTX-DISABLED-NEXT: ret void
3504 ; NVPTX-DISABLED: user_code.entry:
3505 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
3506 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
3507 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
3508 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
3510 %.zero.addr = alloca i32, align 4
3511 %.threadid_temp. = alloca i32, align 4
3512 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
3513 %exec_user_code = icmp eq i32 %0, -1
3514 br i1 %exec_user_code, label %user_code.entry, label %common.ret
3516 common.ret: ; preds = %entry, %user_code.entry
3519 user_code.entry: ; preds = %entry
3520 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
3521 store i32 0, ptr %.zero.addr, align 4
3522 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
3523 call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6
3524 call void @__kmpc_target_deinit()
3525 br label %common.ret
3528 ; Function Attrs: alwaysinline convergent norecurse nounwind
3529 define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
3530 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
3531 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3532 ; AMDGPU-NEXT: entry:
3533 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
3534 ; AMDGPU-NEXT: ret void
3536 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
3537 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3538 ; NVPTX-NEXT: entry:
3539 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
3540 ; NVPTX-NEXT: ret void
3542 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
3543 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3544 ; AMDGPU-DISABLED1-NEXT: entry:
3545 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
3546 ; AMDGPU-DISABLED1-NEXT: ret void
3548 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
3549 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3550 ; AMDGPU-DISABLED2-NEXT: entry:
3551 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
3552 ; AMDGPU-DISABLED2-NEXT: ret void
3554 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
3555 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3556 ; NVPTX-DISABLED1-NEXT: entry:
3557 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
3558 ; NVPTX-DISABLED1-NEXT: ret void
3560 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
3561 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3562 ; NVPTX-DISABLED2-NEXT: entry:
3563 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
3564 ; NVPTX-DISABLED2-NEXT: ret void
3566 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
3567 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3568 ; AMDGPU-DISABLED-NEXT: entry:
3569 ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
3570 ; AMDGPU-DISABLED-NEXT: ret void
3571 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
3572 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3573 ; NVPTX-DISABLED-NEXT: entry:
3574 ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
3575 ; NVPTX-DISABLED-NEXT: ret void
3577 call void @unknown() #11
3581 ; Function Attrs: alwaysinline convergent norecurse nounwind
3582 define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
3583 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3584 ; AMDGPU-SAME: () #[[ATTR0]] {
3585 ; AMDGPU-NEXT: entry:
3586 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3587 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3588 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3589 ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3590 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3591 ; AMDGPU: is_worker_check:
3592 ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3593 ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3594 ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3595 ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3596 ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3597 ; AMDGPU: worker_state_machine.begin:
3598 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3599 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3600 ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3601 ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3602 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3603 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3604 ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3605 ; AMDGPU: worker_state_machine.finished:
3606 ; AMDGPU-NEXT: ret void
3607 ; AMDGPU: worker_state_machine.is_active.check:
3608 ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3609 ; AMDGPU: worker_state_machine.parallel_region.check:
3610 ; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3611 ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3612 ; AMDGPU: worker_state_machine.parallel_region.execute:
3613 ; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3614 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3615 ; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
3616 ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3617 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3618 ; AMDGPU: worker_state_machine.parallel_region.end:
3619 ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
3620 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3621 ; AMDGPU: worker_state_machine.done.barrier:
3622 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3623 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3624 ; AMDGPU: thread.user_code.check:
3625 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3626 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3627 ; AMDGPU: common.ret:
3628 ; AMDGPU-NEXT: ret void
3629 ; AMDGPU: user_code.entry:
3630 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3631 ; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3632 ; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3633 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3634 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
3635 ; AMDGPU-NEXT: br label [[COMMON_RET]]
3637 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3638 ; NVPTX-SAME: () #[[ATTR0]] {
3639 ; NVPTX-NEXT: entry:
3640 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3641 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3642 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3643 ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3644 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3645 ; NVPTX: is_worker_check:
3646 ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3647 ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3648 ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3649 ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3650 ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3651 ; NVPTX: worker_state_machine.begin:
3652 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3653 ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3654 ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3655 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3656 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3657 ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3658 ; NVPTX: worker_state_machine.finished:
3659 ; NVPTX-NEXT: ret void
3660 ; NVPTX: worker_state_machine.is_active.check:
3661 ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3662 ; NVPTX: worker_state_machine.parallel_region.check:
3663 ; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3664 ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3665 ; NVPTX: worker_state_machine.parallel_region.execute:
3666 ; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3667 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3668 ; NVPTX: worker_state_machine.parallel_region.fallback.execute:
3669 ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3670 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3671 ; NVPTX: worker_state_machine.parallel_region.end:
3672 ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
3673 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3674 ; NVPTX: worker_state_machine.done.barrier:
3675 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3676 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3677 ; NVPTX: thread.user_code.check:
3678 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3679 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3680 ; NVPTX: common.ret:
3681 ; NVPTX-NEXT: ret void
3682 ; NVPTX: user_code.entry:
3683 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3684 ; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3685 ; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3686 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3687 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
3688 ; NVPTX-NEXT: br label [[COMMON_RET]]
3690 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3691 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
3692 ; AMDGPU-DISABLED1-NEXT: entry:
3693 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3694 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3695 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3696 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3697 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3698 ; AMDGPU-DISABLED1: is_worker_check:
3699 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3700 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3701 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3702 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3703 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3704 ; AMDGPU-DISABLED1: worker_state_machine.begin:
3705 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3706 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3707 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3708 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3709 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3710 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3711 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3712 ; AMDGPU-DISABLED1: worker_state_machine.finished:
3713 ; AMDGPU-DISABLED1-NEXT: ret void
3714 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
3715 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3716 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
3717 ; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3718 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3719 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
3720 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3721 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3722 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
3723 ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3724 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3725 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
3726 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
3727 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3728 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
3729 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3730 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3731 ; AMDGPU-DISABLED1: thread.user_code.check:
3732 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3733 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3734 ; AMDGPU-DISABLED1: common.ret:
3735 ; AMDGPU-DISABLED1-NEXT: ret void
3736 ; AMDGPU-DISABLED1: user_code.entry:
3737 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3738 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3739 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3740 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3741 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
3742 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
3744 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3745 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
3746 ; AMDGPU-DISABLED2-NEXT: entry:
3747 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3748 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3749 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3750 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3751 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3752 ; AMDGPU-DISABLED2: is_worker_check:
3753 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3754 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3755 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3756 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3757 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3758 ; AMDGPU-DISABLED2: worker_state_machine.begin:
3759 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3760 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3761 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3762 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3763 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3764 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3765 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3766 ; AMDGPU-DISABLED2: worker_state_machine.finished:
3767 ; AMDGPU-DISABLED2-NEXT: ret void
3768 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
3769 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3770 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
3771 ; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3772 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3773 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
3774 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3775 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3776 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
3777 ; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3778 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3779 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
3780 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
3781 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3782 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
3783 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3784 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3785 ; AMDGPU-DISABLED2: thread.user_code.check:
3786 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3787 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3788 ; AMDGPU-DISABLED2: common.ret:
3789 ; AMDGPU-DISABLED2-NEXT: ret void
3790 ; AMDGPU-DISABLED2: user_code.entry:
3791 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3792 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3793 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3794 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3795 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
3796 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
3798 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3799 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
3800 ; NVPTX-DISABLED1-NEXT: entry:
3801 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3802 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3803 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3804 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3805 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3806 ; NVPTX-DISABLED1: is_worker_check:
3807 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3808 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3809 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3810 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3811 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3812 ; NVPTX-DISABLED1: worker_state_machine.begin:
3813 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3814 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3815 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3816 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3817 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3818 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3819 ; NVPTX-DISABLED1: worker_state_machine.finished:
3820 ; NVPTX-DISABLED1-NEXT: ret void
3821 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
3822 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3823 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
3824 ; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3825 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3826 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
3827 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3828 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3829 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
3830 ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3831 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3832 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
3833 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
3834 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3835 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
3836 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3837 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3838 ; NVPTX-DISABLED1: thread.user_code.check:
3839 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3840 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3841 ; NVPTX-DISABLED1: common.ret:
3842 ; NVPTX-DISABLED1-NEXT: ret void
3843 ; NVPTX-DISABLED1: user_code.entry:
3844 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3845 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3846 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3847 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3848 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
3849 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
3851 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3852 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
3853 ; NVPTX-DISABLED2-NEXT: entry:
3854 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3855 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3856 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3857 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3858 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3859 ; NVPTX-DISABLED2: is_worker_check:
3860 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3861 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3862 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3863 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3864 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3865 ; NVPTX-DISABLED2: worker_state_machine.begin:
3866 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3867 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3868 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3869 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3870 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3871 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3872 ; NVPTX-DISABLED2: worker_state_machine.finished:
3873 ; NVPTX-DISABLED2-NEXT: ret void
3874 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
3875 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3876 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
3877 ; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3878 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3879 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
3880 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3881 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3882 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
3883 ; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3884 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3885 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
3886 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
3887 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3888 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
3889 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3890 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3891 ; NVPTX-DISABLED2: thread.user_code.check:
3892 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3893 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3894 ; NVPTX-DISABLED2: common.ret:
3895 ; NVPTX-DISABLED2-NEXT: ret void
3896 ; NVPTX-DISABLED2: user_code.entry:
3897 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3898 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3899 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3900 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3901 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
3902 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
3904 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3905 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
3906 ; AMDGPU-DISABLED-NEXT: entry:
3907 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3908 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3909 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
3910 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3911 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3912 ; AMDGPU-DISABLED: is_worker_check:
3913 ; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3914 ; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3915 ; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3916 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3917 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3918 ; AMDGPU-DISABLED: worker_state_machine.begin:
3919 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3920 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
3921 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
3922 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
3923 ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3924 ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3925 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3926 ; AMDGPU-DISABLED: worker_state_machine.finished:
3927 ; AMDGPU-DISABLED-NEXT: ret void
3928 ; AMDGPU-DISABLED: worker_state_machine.is_active.check:
3929 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3930 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
3931 ; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3932 ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3933 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
3934 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3935 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3936 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
3937 ; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3938 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3939 ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
3940 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
3941 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3942 ; AMDGPU-DISABLED: worker_state_machine.done.barrier:
3943 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3944 ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3945 ; AMDGPU-DISABLED: thread.user_code.check:
3946 ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3947 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3948 ; AMDGPU-DISABLED: common.ret:
3949 ; AMDGPU-DISABLED-NEXT: ret void
3950 ; AMDGPU-DISABLED: user_code.entry:
3951 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3952 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3953 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3954 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3955 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
3956 ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
3957 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
3958 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
3959 ; NVPTX-DISABLED-NEXT: entry:
3960 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
3961 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3962 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
3963 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
3964 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
3965 ; NVPTX-DISABLED: is_worker_check:
3966 ; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3967 ; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
3968 ; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
3969 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
3970 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
3971 ; NVPTX-DISABLED: worker_state_machine.begin:
3972 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3973 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
3974 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
3975 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
3976 ; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
3977 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
3978 ; NVPTX-DISABLED: worker_state_machine.finished:
3979 ; NVPTX-DISABLED-NEXT: ret void
3980 ; NVPTX-DISABLED: worker_state_machine.is_active.check:
3981 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
3982 ; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
3983 ; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
3984 ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
3985 ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
3986 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
3987 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
3988 ; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
3989 ; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
3990 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3991 ; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
3992 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
3993 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3994 ; NVPTX-DISABLED: worker_state_machine.done.barrier:
3995 ; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3996 ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3997 ; NVPTX-DISABLED: thread.user_code.check:
3998 ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3999 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
4000 ; NVPTX-DISABLED: common.ret:
4001 ; NVPTX-DISABLED-NEXT: ret void
4002 ; NVPTX-DISABLED: user_code.entry:
4003 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
4004 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
4005 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
4006 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4007 ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
4008 ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
4010 %captured_vars_addrs = alloca [0 x ptr], align 8
4011 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
4012 %exec_user_code = icmp eq i32 %0, -1
4013 br i1 %exec_user_code, label %user_code.entry, label %common.ret
4015 common.ret: ; preds = %entry, %user_code.entry
4018 user_code.entry: ; preds = %entry
4019 %1 = call i32 @__kmpc_global_thread_num(ptr @1)
4020 %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
4021 %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
4022 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
4023 call void @__kmpc_target_deinit()
4024 br label %common.ret
4027 ; Function Attrs: alwaysinline convergent nounwind
4028 define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 {
4029 ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
4030 ; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4031 ; AMDGPU-NEXT: entry:
4032 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
4033 ; AMDGPU-NEXT: ret void
4035 ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
4036 ; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4037 ; NVPTX-NEXT: entry:
4038 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
4039 ; NVPTX-NEXT: ret void
4041 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
4042 ; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4043 ; AMDGPU-DISABLED1-NEXT: entry:
4044 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
4045 ; AMDGPU-DISABLED1-NEXT: ret void
4047 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
4048 ; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4049 ; AMDGPU-DISABLED2-NEXT: entry:
4050 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
4051 ; AMDGPU-DISABLED2-NEXT: ret void
4053 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
4054 ; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4055 ; NVPTX-DISABLED1-NEXT: entry:
4056 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
4057 ; NVPTX-DISABLED1-NEXT: ret void
4059 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
4060 ; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4061 ; NVPTX-DISABLED2-NEXT: entry:
4062 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
4063 ; NVPTX-DISABLED2-NEXT: ret void
4065 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
4066 ; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4067 ; AMDGPU-DISABLED-NEXT: entry:
4068 ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
4069 ; AMDGPU-DISABLED-NEXT: ret void
4070 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
4071 ; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
4072 ; NVPTX-DISABLED-NEXT: entry:
4073 ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
4074 ; NVPTX-DISABLED-NEXT: ret void
4076 call void @spmd_amenable() #10
4080 ; Function Attrs: convergent norecurse nounwind
4081 define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 {
4083 %2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2
4084 %3 = load ptr, ptr %1, align 8, !tbaa !30
4085 call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6
4089 ; Function Attrs: nounwind
4090 declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6
4092 ; Function Attrs: nounwind
4093 declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6
4095 ; Function Attrs: nosync nounwind
4096 declare void @__kmpc_free_shared(ptr nocapture, i64) #8
4098 ; Function Attrs: nofree nosync nounwind
4099 declare ptr @__kmpc_alloc_shared(i64) #7
4101 ; Function Attrs: convergent
4102 declare void @use(ptr nocapture) #5
4104 ; Function Attrs: convergent
4105 declare void @unknown() #2
4106 declare void @unknowni32p(ptr) #2
4108 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
4109 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
4111 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
4112 define weak i32 @__kmpc_target_init(ptr, ptr) {
4113 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
4114 ; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4115 ; AMDGPU-NEXT: ret i32 0
4117 ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
4118 ; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4119 ; NVPTX-NEXT: ret i32 0
4121 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
4122 ; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4123 ; AMDGPU-DISABLED1-NEXT: ret i32 0
4125 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
4126 ; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4127 ; AMDGPU-DISABLED2-NEXT: ret i32 0
4129 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
4130 ; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4131 ; NVPTX-DISABLED1-NEXT: ret i32 0
4133 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
4134 ; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
4135 ; NVPTX-DISABLED2-NEXT: ret i32 0
4137 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
4138 ; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
4139 ; AMDGPU-DISABLED-NEXT: ret i32 0
4140 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
4141 ; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
4142 ; NVPTX-DISABLED-NEXT: ret i32 0
4146 declare void @__kmpc_get_shared_variables(ptr)
4148 ; Function Attrs: alwaysinline
4149 declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
4151 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
4152 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
4154 ; Function Attrs: convergent
4155 declare void @spmd_amenable() #5
4157 ; Function Attrs: nounwind
4158 declare i32 @__kmpc_global_thread_num(ptr) #6
4160 declare void @__kmpc_target_deinit()
4163 ; Function Attrs: alwaysinline convergent norecurse nounwind
4164 define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
4165 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
4166 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4167 ; AMDGPU-NEXT: entry:
4168 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
4169 ; AMDGPU-NEXT: ret void
4171 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
4172 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4173 ; NVPTX-NEXT: entry:
4174 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
4175 ; NVPTX-NEXT: ret void
4177 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
4178 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4179 ; AMDGPU-DISABLED1-NEXT: entry:
4180 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
4181 ; AMDGPU-DISABLED1-NEXT: ret void
4183 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
4184 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4185 ; AMDGPU-DISABLED2-NEXT: entry:
4186 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
4187 ; AMDGPU-DISABLED2-NEXT: ret void
4189 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
4190 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4191 ; NVPTX-DISABLED1-NEXT: entry:
4192 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
4193 ; NVPTX-DISABLED1-NEXT: ret void
4195 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
4196 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4197 ; NVPTX-DISABLED2-NEXT: entry:
4198 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
4199 ; NVPTX-DISABLED2-NEXT: ret void
4201 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
4202 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4203 ; AMDGPU-DISABLED-NEXT: entry:
4204 ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
4205 ; AMDGPU-DISABLED-NEXT: ret void
4206 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
4207 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
4208 ; NVPTX-DISABLED-NEXT: entry:
4209 ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
4210 ; NVPTX-DISABLED-NEXT: ret void
4212 call void @unknown() #11
4216 ; Function Attrs: convergent norecurse nounwind
4217 define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
4218 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4219 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4220 ; AMDGPU-NEXT: entry:
4221 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4222 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4223 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4224 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4225 ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4226 ; AMDGPU-NEXT: ret void
4228 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4229 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4230 ; NVPTX-NEXT: entry:
4231 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4232 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4233 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4234 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4235 ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4236 ; NVPTX-NEXT: ret void
4238 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4239 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4240 ; AMDGPU-DISABLED1-NEXT: entry:
4241 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4242 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4243 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4244 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4245 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4246 ; AMDGPU-DISABLED1-NEXT: ret void
4248 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4249 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4250 ; AMDGPU-DISABLED2-NEXT: entry:
4251 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4252 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4253 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4254 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4255 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4256 ; AMDGPU-DISABLED2-NEXT: ret void
4258 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4259 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4260 ; NVPTX-DISABLED1-NEXT: entry:
4261 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4262 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4263 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4264 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4265 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4266 ; NVPTX-DISABLED1-NEXT: ret void
4268 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4269 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4270 ; NVPTX-DISABLED2-NEXT: entry:
4271 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4272 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4273 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4274 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4275 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4276 ; NVPTX-DISABLED2-NEXT: ret void
4278 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4279 ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4280 ; AMDGPU-DISABLED-NEXT: entry:
4281 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4282 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4283 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4284 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4285 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4286 ; AMDGPU-DISABLED-NEXT: ret void
4287 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
4288 ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
4289 ; NVPTX-DISABLED-NEXT: entry:
4290 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
4291 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4292 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4293 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4294 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
4295 ; NVPTX-DISABLED-NEXT: ret void
4297 %.addr1 = alloca i32, align 4
4298 %.zero.addr = alloca i32, align 4
4299 %global_args = alloca ptr, align 8
4300 store i32 %1, ptr %.addr1, align 4, !tbaa !18
4301 store i32 0, ptr %.zero.addr, align 4
4302 call void @__kmpc_get_shared_variables(ptr %global_args)
4303 call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6
4307 declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
4309 attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" }
4310 attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
4311 attributes #2 = { convergent }
4312 attributes #3 = { convergent norecurse nounwind }
4313 attributes #4 = { alwaysinline }
4314 attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" }
4315 attributes #6 = { nounwind }
4316 attributes #7 = { nofree nosync nounwind }
4317 attributes #8 = { nosync nounwind }
4318 attributes #9 = { alwaysinline convergent nounwind }
4319 attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
4320 attributes #11 = { convergent }
4322 !omp_offload.info = !{!0, !1, !2, !3, !4, !5}
4323 !nvvm.annotations = !{!6, !7, !8, !9, !10, !11}
4324 !llvm.module.flags = !{!12, !13, !14, !15, !16}
4325 !llvm.ident = !{!17}
4327 !0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4328 !1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4329 !2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4330 !3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4331 !4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4332 !5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4333 !6 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4334 !7 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4335 !8 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4336 !9 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4337 !10 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4338 !11 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4339 !12 = !{i32 1, !"wchar_size", i32 4}
4340 !13 = !{i32 7, !"openmp", i32 50}
4341 !14 = !{i32 7, !"openmp-device", i32 50}
4342 !15 = !{i32 8, !"PIC Level", i32 2}
4343 !16 = !{i32 7, !"frame-pointer", i32 2}
4344 !17 = !{!"clang version 14.0.0"}
4345 !18 = !{!19, !19, i64 0}
4346 !19 = !{!"int", !20, i64 0}
4347 !20 = !{!"omnipotent char", !21, i64 0}
4348 !21 = !{!"Simple C/C++ TBAA"}
4349 !22 = distinct !{!22, !23, !24}
4350 !23 = !{!"llvm.loop.mustprogress"}
4351 !24 = !{!"llvm.loop.unroll.disable"}
4352 !25 = distinct !{!25, !23, !24}
4353 !26 = !{!27, !27, i64 0}
4354 !27 = !{!"any pointer", !20, i64 0}
4355 !28 = distinct !{!28, !23, !24}
4356 !29 = distinct !{!29, !23, !24}
4357 !30 = !{!31, !27, i64 0}
4358 !31 = !{!"kmp_task_t_with_privates", !32, i64 0}
4359 !32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
4360 ; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
4361 ; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse }
4362 ; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4363 ; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
4364 ; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
4365 ; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind }
4366 ; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
4367 ; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4368 ; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4369 ; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent }
4370 ; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4371 ; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
4372 ; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
4373 ; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
4374 ; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse }
4375 ; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4376 ; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
4377 ; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
4378 ; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind }
4379 ; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
4380 ; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4381 ; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4382 ; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent }
4383 ; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4384 ; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
4385 ; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
4386 ; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4387 ; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4388 ; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4389 ; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4390 ; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4391 ; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4392 ; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4393 ; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4394 ; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4395 ; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4396 ; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4397 ; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4398 ; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4399 ; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4400 ; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4401 ; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4402 ; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4403 ; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4404 ; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
4405 ; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4406 ; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4407 ; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4408 ; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
4409 ; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4410 ; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4411 ; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
4412 ; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
4413 ; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4414 ; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
4415 ; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
4416 ; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4417 ; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4418 ; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4419 ; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4420 ; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4421 ; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4422 ; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4423 ; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4424 ; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4425 ; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4426 ; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4427 ; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4428 ; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4429 ; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4430 ; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4431 ; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4432 ; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4433 ; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4434 ; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
4435 ; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4436 ; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4437 ; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4438 ; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
4439 ; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4440 ; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4441 ; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
4442 ; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
4443 ; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4444 ; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
4445 ; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
4447 ; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4448 ; AMDGPU: attributes #[[ATTR1]] = { norecurse }
4449 ; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4450 ; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4451 ; AMDGPU: attributes #[[ATTR4]] = { nounwind }
4452 ; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4453 ; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4454 ; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4455 ; AMDGPU: attributes #[[ATTR8]] = { convergent }
4456 ; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4457 ; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4458 ; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4460 ; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4461 ; NVPTX: attributes #[[ATTR1]] = { norecurse }
4462 ; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4463 ; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4464 ; NVPTX: attributes #[[ATTR4]] = { nounwind }
4465 ; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4466 ; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4467 ; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4468 ; NVPTX: attributes #[[ATTR8]] = { convergent }
4469 ; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4470 ; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4471 ; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4473 ; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4474 ; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
4475 ; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4476 ; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4477 ; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind }
4478 ; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4479 ; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4480 ; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4481 ; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent }
4482 ; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4483 ; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4484 ; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4486 ; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4487 ; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
4488 ; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4489 ; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4490 ; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind }
4491 ; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4492 ; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4493 ; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4494 ; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent }
4495 ; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4496 ; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4497 ; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4499 ; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4500 ; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
4501 ; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4502 ; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4503 ; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind }
4504 ; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4505 ; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4506 ; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4507 ; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent }
4508 ; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4509 ; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4510 ; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4512 ; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
4513 ; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
4514 ; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
4515 ; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
4516 ; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind }
4517 ; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
4518 ; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
4519 ; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
4520 ; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent }
4521 ; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
4522 ; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
4523 ; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
4525 ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4526 ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4527 ; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4528 ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4529 ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4530 ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4531 ; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4532 ; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4533 ; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4534 ; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4535 ; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4536 ; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4537 ; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4538 ; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4539 ; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4540 ; AMDGPU: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4541 ; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4542 ; AMDGPU: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4543 ; AMDGPU: [[TBAA18]] = !{!19, !19, i64 0}
4544 ; AMDGPU: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4545 ; AMDGPU: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4546 ; AMDGPU: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4547 ; AMDGPU: [[LOOP22]] = distinct !{!22, !23, !24}
4548 ; AMDGPU: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4549 ; AMDGPU: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4550 ; AMDGPU: [[LOOP25]] = distinct !{!25, !23, !24}
4551 ; AMDGPU: [[TBAA26]] = !{!27, !27, i64 0}
4552 ; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4553 ; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24}
4554 ; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24}
4556 ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4557 ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4558 ; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4559 ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4560 ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4561 ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4562 ; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4563 ; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4564 ; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4565 ; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4566 ; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4567 ; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4568 ; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4569 ; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4570 ; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4571 ; NVPTX: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4572 ; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4573 ; NVPTX: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4574 ; NVPTX: [[TBAA18]] = !{!19, !19, i64 0}
4575 ; NVPTX: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4576 ; NVPTX: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4577 ; NVPTX: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4578 ; NVPTX: [[LOOP22]] = distinct !{!22, !23, !24}
4579 ; NVPTX: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4580 ; NVPTX: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4581 ; NVPTX: [[LOOP25]] = distinct !{!25, !23, !24}
4582 ; NVPTX: [[TBAA26]] = !{!27, !27, i64 0}
4583 ; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4584 ; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24}
4585 ; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24}
4587 ; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4588 ; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4589 ; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4590 ; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4591 ; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4592 ; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4593 ; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4594 ; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4595 ; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4596 ; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4597 ; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4598 ; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4599 ; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4600 ; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4601 ; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4602 ; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4603 ; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4604 ; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4605 ; AMDGPU-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0}
4606 ; AMDGPU-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4607 ; AMDGPU-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4608 ; AMDGPU-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4609 ; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24}
4610 ; AMDGPU-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4611 ; AMDGPU-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4612 ; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24}
4613 ; AMDGPU-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0}
4614 ; AMDGPU-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4615 ; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24}
4616 ; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24}
4618 ; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4619 ; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4620 ; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4621 ; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4622 ; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4623 ; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4624 ; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4625 ; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4626 ; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4627 ; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4628 ; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4629 ; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4630 ; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4631 ; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4632 ; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4633 ; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4634 ; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4635 ; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4636 ; AMDGPU-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0}
4637 ; AMDGPU-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4638 ; AMDGPU-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4639 ; AMDGPU-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4640 ; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24}
4641 ; AMDGPU-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4642 ; AMDGPU-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4643 ; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24}
4644 ; AMDGPU-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0}
4645 ; AMDGPU-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4646 ; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24}
4647 ; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24}
4649 ; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4650 ; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4651 ; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4652 ; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4653 ; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4654 ; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4655 ; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4656 ; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4657 ; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4658 ; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4659 ; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4660 ; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4661 ; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4662 ; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4663 ; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4664 ; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4665 ; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4666 ; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4667 ; NVPTX-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0}
4668 ; NVPTX-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4669 ; NVPTX-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4670 ; NVPTX-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4671 ; NVPTX-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24}
4672 ; NVPTX-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4673 ; NVPTX-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4674 ; NVPTX-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24}
4675 ; NVPTX-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0}
4676 ; NVPTX-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4677 ; NVPTX-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24}
4678 ; NVPTX-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24}
4680 ; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
4681 ; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
4682 ; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
4683 ; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
4684 ; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
4685 ; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
4686 ; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
4687 ; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
4688 ; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
4689 ; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
4690 ; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
4691 ; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
4692 ; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4693 ; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4694 ; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4695 ; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
4696 ; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
4697 ; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
4698 ; NVPTX-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0}
4699 ; NVPTX-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
4700 ; NVPTX-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
4701 ; NVPTX-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
4702 ; NVPTX-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24}
4703 ; NVPTX-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
4704 ; NVPTX-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
4705 ; NVPTX-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24}
4706 ; NVPTX-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0}
4707 ; NVPTX-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
4708 ; NVPTX-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24}
4709 ; NVPTX-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24}