1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
3 ; RUN: opt -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=CHECK-DISABLED
6 ;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")));
8 ;; void sequential_loop() {
9 ;; #pragma omp target teams
11 ;; for (int i = 0; i < 100; ++i) {
12 ;; #pragma omp parallel
21 ;; void use(__attribute__((noescape)) int *) __attribute__((assume("ompx_spmd_amenable")));
23 ;; void sequential_loop_to_stack_var() {
24 ;; #pragma omp target teams
28 ;; for (int i = 0; i < 100; ++i) {
29 ;; #pragma omp parallel
38 ;; void sequential_loop_to_shared_var() {
39 ;; #pragma omp target teams
42 ;; for (int i = 0; i < 100; ++i) {
43 ;; #pragma omp parallel
53 ;; void sequential_loop_to_shared_var_guarded() {
54 ;; #pragma omp target teams
57 ;; for (int i = 0; i < 100; ++i) {
58 ;; #pragma omp parallel
68 ;; void do_not_spmdize_target() {
69 ;; #pragma omp target teams
71 ;; // Incompatible parallel level, called both
72 ;; // from parallel and target regions
77 target triple = "nvptx64"
79 %struct.ident_t = type { i32, i32, i32, i32, i8* }
81 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
82 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
83 @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode = weak constant i8 1
84 @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1
85 @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1
86 @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1
87 @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode = weak constant i8 1
88 @llvm.compiler.used = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata"
91 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
92 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
93 ; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
94 ; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
95 ; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
96 ; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
97 ; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
98 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata"
99 ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
100 ; CHECK: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
102 ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
103 ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
104 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
105 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
106 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
107 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
108 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
109 ; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata"
110 ; CHECK-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
111 ; CHECK-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
112 ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
113 ; CHECK-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
114 ; CHECK-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
115 ; CHECK-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
117 define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 {
118 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5
119 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
121 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
122 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
123 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
124 ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
125 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
126 ; CHECK: user_code.entry:
127 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
128 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
129 ; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
130 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
131 ; CHECK-NEXT: ret void
132 ; CHECK: worker.exit:
133 ; CHECK-NEXT: ret void
135 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5
136 ; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
137 ; CHECK-DISABLED-NEXT: entry:
138 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
139 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
140 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
141 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
142 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
143 ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
144 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
145 ; CHECK-DISABLED: worker_state_machine.begin:
146 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
147 ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
148 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
149 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
150 ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
151 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
152 ; CHECK-DISABLED: worker_state_machine.finished:
153 ; CHECK-DISABLED-NEXT: ret void
154 ; CHECK-DISABLED: worker_state_machine.is_active.check:
155 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
156 ; CHECK-DISABLED: worker_state_machine.parallel_region.check:
157 ; CHECK-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
158 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
159 ; CHECK-DISABLED: worker_state_machine.parallel_region.execute:
160 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
161 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
162 ; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute:
163 ; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
164 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
165 ; CHECK-DISABLED: worker_state_machine.parallel_region.end:
166 ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
167 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
168 ; CHECK-DISABLED: worker_state_machine.done.barrier:
169 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
170 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
171 ; CHECK-DISABLED: thread.user_code.check:
172 ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
173 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
174 ; CHECK-DISABLED: user_code.entry:
175 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
176 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
177 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
178 ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
179 ; CHECK-DISABLED-NEXT: ret void
180 ; CHECK-DISABLED: worker.exit:
181 ; CHECK-DISABLED-NEXT: ret void
184 %.zero.addr = alloca i32, align 4
185 %.threadid_temp. = alloca i32, align 4
186 store i32 0, i32* %.zero.addr, align 4
187 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
188 %exec_user_code = icmp eq i32 %0, -1
189 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
191 user_code.entry: ; preds = %entry
192 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
193 store i32 %1, i32* %.threadid_temp., align 4
194 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
195 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
198 worker.exit: ; preds = %entry
202 declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
204 define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
205 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
206 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
208 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
209 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
210 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
211 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
212 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
213 ; CHECK-NEXT: store i32 0, i32* [[I]], align 4
214 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
216 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4
217 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100
218 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
220 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
221 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
222 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
223 ; CHECK-NEXT: br label [[FOR_INC:%.*]]
225 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4
226 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
227 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4
228 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
230 ; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]]
231 ; CHECK-NEXT: ret void
233 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
234 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
235 ; CHECK-DISABLED-NEXT: entry:
236 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
237 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
238 ; CHECK-DISABLED-NEXT: [[I:%.*]] = alloca i32, align 4
239 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
240 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
241 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4
242 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]]
243 ; CHECK-DISABLED: for.cond:
244 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4
245 ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100
246 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
247 ; CHECK-DISABLED: for.body:
248 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
249 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
250 ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
251 ; CHECK-DISABLED-NEXT: br label [[FOR_INC:%.*]]
252 ; CHECK-DISABLED: for.inc:
253 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4
254 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
255 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
256 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
257 ; CHECK-DISABLED: for.end:
258 ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]]
259 ; CHECK-DISABLED-NEXT: ret void
262 %.global_tid..addr = alloca i32*, align 8
263 %.bound_tid..addr = alloca i32*, align 8
264 %i = alloca i32, align 4
265 %captured_vars_addrs = alloca [0 x i8*], align 8
266 store i32* %.global_tid., i32** %.global_tid..addr, align 8
267 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
268 store i32 0, i32* %i, align 4
271 for.cond: ; preds = %for.inc, %entry
272 %0 = load i32, i32* %i, align 4
273 %cmp = icmp slt i32 %0, 100
274 br i1 %cmp, label %for.body, label %for.end
276 for.body: ; preds = %for.cond
277 %1 = load i32*, i32** %.global_tid..addr, align 8
278 %2 = load i32, i32* %1, align 4
279 %3 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
280 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %2, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %3, i64 0)
283 for.inc: ; preds = %for.body
284 %4 = load i32, i32* %i, align 4
285 %inc = add nsw i32 %4, 1
286 store i32 %inc, i32* %i, align 4
287 br label %for.cond, !llvm.loop !13
289 for.end: ; preds = %for.cond
290 call void @spmd_amenable() #4
294 define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
295 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
296 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
298 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
299 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
300 ; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
301 ; CHECK-NEXT: ret void
303 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
304 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
305 ; CHECK-DISABLED-NEXT: entry:
306 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
307 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
308 ; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
309 ; CHECK-DISABLED-NEXT: ret void
312 %.global_tid..addr = alloca i32*, align 8
313 %.bound_tid..addr = alloca i32*, align 8
314 store i32* %.global_tid., i32** %.global_tid..addr, align 8
315 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
316 call void @unknown() #5
320 declare void @unknown() #1
322 define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #0 {
323 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
324 ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
326 ; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
327 ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
328 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
329 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
330 ; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
331 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
332 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
333 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
334 ; CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
335 ; CHECK-NEXT: ret void
337 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
338 ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
339 ; CHECK-DISABLED-NEXT: entry:
340 ; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
341 ; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
342 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
343 ; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
344 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
345 ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
346 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
347 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
348 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
349 ; CHECK-DISABLED-NEXT: ret void
352 %.addr = alloca i16, align 2
353 %.addr1 = alloca i32, align 4
354 %.zero.addr = alloca i32, align 4
355 %global_args = alloca i8**, align 8
356 store i32 0, i32* %.zero.addr, align 4
357 store i16 %0, i16* %.addr, align 2
358 store i32 %1, i32* %.addr1, align 4
359 call void @__kmpc_get_shared_variables(i8*** %global_args)
360 call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #3
364 declare void @__kmpc_get_shared_variables(i8***)
366 declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
368 declare void @spmd_amenable() #2
370 declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
372 declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
374 define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() #0 {
375 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20
376 ; CHECK-SAME: () #[[ATTR0]] {
378 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
379 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
380 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
381 ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
382 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
383 ; CHECK: user_code.entry:
384 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
385 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
386 ; CHECK-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
387 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
388 ; CHECK-NEXT: ret void
389 ; CHECK: worker.exit:
390 ; CHECK-NEXT: ret void
392 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20
393 ; CHECK-DISABLED-SAME: () #[[ATTR0]] {
394 ; CHECK-DISABLED-NEXT: entry:
395 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
396 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
397 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
398 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
399 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
400 ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
401 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
402 ; CHECK-DISABLED: worker_state_machine.begin:
403 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
404 ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
405 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
406 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
407 ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
408 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
409 ; CHECK-DISABLED: worker_state_machine.finished:
410 ; CHECK-DISABLED-NEXT: ret void
411 ; CHECK-DISABLED: worker_state_machine.is_active.check:
412 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
413 ; CHECK-DISABLED: worker_state_machine.parallel_region.check:
414 ; CHECK-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*)
415 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
416 ; CHECK-DISABLED: worker_state_machine.parallel_region.execute:
417 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
418 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
419 ; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute:
420 ; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
421 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
422 ; CHECK-DISABLED: worker_state_machine.parallel_region.end:
423 ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
424 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
425 ; CHECK-DISABLED: worker_state_machine.done.barrier:
426 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
427 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
428 ; CHECK-DISABLED: thread.user_code.check:
429 ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
430 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
431 ; CHECK-DISABLED: user_code.entry:
432 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
433 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
434 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
435 ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
436 ; CHECK-DISABLED-NEXT: ret void
437 ; CHECK-DISABLED: worker.exit:
438 ; CHECK-DISABLED-NEXT: ret void
441 %.zero.addr = alloca i32, align 4
442 %.threadid_temp. = alloca i32, align 4
443 store i32 0, i32* %.zero.addr, align 4
444 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
445 %exec_user_code = icmp eq i32 %0, -1
446 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
448 user_code.entry: ; preds = %entry
449 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
450 store i32 %1, i32* %.threadid_temp., align 4
451 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #3
452 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
455 worker.exit: ; preds = %entry
459 define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
460 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
461 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
463 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
464 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
465 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
466 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
467 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
468 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
469 ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
470 ; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]]
471 ; CHECK-NEXT: store i32 0, i32* [[I]], align 4
472 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
474 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4
475 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100
476 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
478 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
479 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
480 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
481 ; CHECK-NEXT: br label [[FOR_INC:%.*]]
483 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4
484 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
485 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4
486 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
488 ; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]]
489 ; CHECK-NEXT: ret void
491 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
492 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
493 ; CHECK-DISABLED-NEXT: entry:
494 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
495 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
496 ; CHECK-DISABLED-NEXT: [[I:%.*]] = alloca i32, align 4
497 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
498 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
499 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
500 ; CHECK-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
501 ; CHECK-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]]
502 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4
503 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]]
504 ; CHECK-DISABLED: for.cond:
505 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4
506 ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100
507 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
508 ; CHECK-DISABLED: for.body:
509 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
510 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
511 ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
512 ; CHECK-DISABLED-NEXT: br label [[FOR_INC:%.*]]
513 ; CHECK-DISABLED: for.inc:
514 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4
515 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
516 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
517 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
518 ; CHECK-DISABLED: for.end:
519 ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]]
520 ; CHECK-DISABLED-NEXT: ret void
523 %.global_tid..addr = alloca i32*, align 8
524 %.bound_tid..addr = alloca i32*, align 8
525 %i = alloca i32, align 4
526 %captured_vars_addrs = alloca [0 x i8*], align 8
527 store i32* %.global_tid., i32** %.global_tid..addr, align 8
528 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
529 %x = call i8* @__kmpc_alloc_shared(i64 4)
530 %x_on_stack = bitcast i8* %x to i32*
531 call void @use(i32* nocapture %x_on_stack) #4
532 store i32 0, i32* %i, align 4
535 for.cond: ; preds = %for.inc, %entry
536 %0 = load i32, i32* %i, align 4
537 %cmp = icmp slt i32 %0, 100
538 br i1 %cmp, label %for.body, label %for.end
540 for.body: ; preds = %for.cond
541 %1 = load i32*, i32** %.global_tid..addr, align 8
542 %2 = load i32, i32* %1, align 4
543 %3 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
544 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %2, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0)
547 for.inc: ; preds = %for.body
548 %4 = load i32, i32* %i, align 4
549 %inc = add nsw i32 %4, 1
550 store i32 %inc, i32* %i, align 4
551 br label %for.cond, !llvm.loop !15
553 for.end: ; preds = %for.cond
554 call void @spmd_amenable() #4
555 call void @__kmpc_free_shared(i8* %x, i64 4)
559 declare i8* @__kmpc_alloc_shared(i64) #3
561 declare void @use(i32* nocapture) #2
563 define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
564 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3
565 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
567 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
568 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
569 ; CHECK-NEXT: call void @unknown() #[[ATTR6]]
570 ; CHECK-NEXT: ret void
572 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
573 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
574 ; CHECK-DISABLED-NEXT: entry:
575 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
576 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
577 ; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]]
578 ; CHECK-DISABLED-NEXT: ret void
581 %.global_tid..addr = alloca i32*, align 8
582 %.bound_tid..addr = alloca i32*, align 8
583 store i32* %.global_tid., i32** %.global_tid..addr, align 8
584 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
585 call void @unknown() #5
589 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
590 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
591 ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
593 ; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
594 ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
595 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
596 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
597 ; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
598 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
599 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
600 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
601 ; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
602 ; CHECK-NEXT: ret void
604 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
605 ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
606 ; CHECK-DISABLED-NEXT: entry:
607 ; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
608 ; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
609 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
610 ; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
611 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
612 ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
613 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
614 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
615 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
616 ; CHECK-DISABLED-NEXT: ret void
619 %.addr = alloca i16, align 2
620 %.addr1 = alloca i32, align 4
621 %.zero.addr = alloca i32, align 4
622 %global_args = alloca i8**, align 8
623 store i32 0, i32* %.zero.addr, align 4
624 store i16 %0, i16* %.addr, align 2
625 store i32 %1, i32* %.addr1, align 4
626 call void @__kmpc_get_shared_variables(i8*** %global_args)
627 call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3
631 declare void @__kmpc_free_shared(i8* nocapture, i64) #3
633 define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35() #0 {
634 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35
635 ; CHECK-SAME: () #[[ATTR0]] {
637 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
638 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
639 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
640 ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
641 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
642 ; CHECK: user_code.entry:
643 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
644 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
645 ; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
646 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
647 ; CHECK-NEXT: ret void
648 ; CHECK: worker.exit:
649 ; CHECK-NEXT: ret void
651 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35
652 ; CHECK-DISABLED-SAME: () #[[ATTR0]] {
653 ; CHECK-DISABLED-NEXT: entry:
654 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
655 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
656 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
657 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
658 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
659 ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
660 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
661 ; CHECK-DISABLED: worker_state_machine.begin:
662 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
663 ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
664 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
665 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
666 ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
667 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
668 ; CHECK-DISABLED: worker_state_machine.finished:
669 ; CHECK-DISABLED-NEXT: ret void
670 ; CHECK-DISABLED: worker_state_machine.is_active.check:
671 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
672 ; CHECK-DISABLED: worker_state_machine.parallel_region.check:
673 ; CHECK-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
674 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
675 ; CHECK-DISABLED: worker_state_machine.parallel_region.execute:
676 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
677 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
678 ; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute:
679 ; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
680 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
681 ; CHECK-DISABLED: worker_state_machine.parallel_region.end:
682 ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
683 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
684 ; CHECK-DISABLED: worker_state_machine.done.barrier:
685 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
686 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
687 ; CHECK-DISABLED: thread.user_code.check:
688 ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
689 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
690 ; CHECK-DISABLED: user_code.entry:
691 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
692 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
693 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
694 ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
695 ; CHECK-DISABLED-NEXT: ret void
696 ; CHECK-DISABLED: worker.exit:
697 ; CHECK-DISABLED-NEXT: ret void
700 %.zero.addr = alloca i32, align 4
701 %.threadid_temp. = alloca i32, align 4
702 store i32 0, i32* %.zero.addr, align 4
703 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
704 %exec_user_code = icmp eq i32 %0, -1
705 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
707 user_code.entry: ; preds = %entry
708 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
709 store i32 %1, i32* %.threadid_temp., align 4
710 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
711 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
714 worker.exit: ; preds = %entry
718 define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
719 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__4
720 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
722 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
723 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
724 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
725 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
726 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
727 ; CHECK-NEXT: store i32 0, i32* [[I]], align 4
728 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
730 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4
731 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100
732 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
734 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
735 ; CHECK-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP1]], align 8
736 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
737 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
738 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 1)
739 ; CHECK-NEXT: br label [[FOR_INC:%.*]]
741 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4
742 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
743 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4
744 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
746 ; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]]
747 ; CHECK-NEXT: ret void
749 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
750 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
751 ; CHECK-DISABLED-NEXT: entry:
752 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
753 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
754 ; CHECK-DISABLED-NEXT: [[I:%.*]] = alloca i32, align 4
755 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
756 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
757 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4
758 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]]
759 ; CHECK-DISABLED: for.cond:
760 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4
761 ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100
762 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
763 ; CHECK-DISABLED: for.body:
764 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
765 ; CHECK-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP1]], align 8
766 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
767 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
768 ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 1)
769 ; CHECK-DISABLED-NEXT: br label [[FOR_INC:%.*]]
770 ; CHECK-DISABLED: for.inc:
771 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4
772 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
773 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
774 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
775 ; CHECK-DISABLED: for.end:
776 ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]]
777 ; CHECK-DISABLED-NEXT: ret void
780 %.global_tid..addr = alloca i32*, align 8
781 %.bound_tid..addr = alloca i32*, align 8
782 %i = alloca i32, align 4
783 %captured_vars_addrs = alloca [1 x i8*], align 8
784 store i32* %.global_tid., i32** %.global_tid..addr, align 8
785 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
786 %x = call i8* @__kmpc_alloc_shared(i64 4)
787 %x_on_stack = bitcast i8* %x to i32*
788 store i32 0, i32* %i, align 4
791 for.cond: ; preds = %for.inc, %entry
792 %0 = load i32, i32* %i, align 4
793 %cmp = icmp slt i32 %0, 100
794 br i1 %cmp, label %for.body, label %for.end
796 for.body: ; preds = %for.cond
797 %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0
798 %2 = bitcast i32* %x_on_stack to i8*
799 store i8* %2, i8** %1, align 8
800 %3 = load i32*, i32** %.global_tid..addr, align 8
801 %4 = load i32, i32* %3, align 4
802 %5 = bitcast [1 x i8*]* %captured_vars_addrs to i8**
803 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %4, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %5, i64 1)
806 for.inc: ; preds = %for.body
807 %6 = load i32, i32* %i, align 4
808 %inc = add nsw i32 %6, 1
809 store i32 %inc, i32* %i, align 4
810 br label %for.cond, !llvm.loop !16
812 for.end: ; preds = %for.cond
813 call void @spmd_amenable() #4
814 call void @__kmpc_free_shared(i8* %x, i64 4)
818 define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 {
819 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5
820 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
822 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
823 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
824 ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8
825 ; CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8
826 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4
827 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
828 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4
829 ; CHECK-NEXT: call void @unknown() #[[ATTR6]]
830 ; CHECK-NEXT: ret void
832 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
833 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
834 ; CHECK-DISABLED-NEXT: entry:
835 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
836 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
837 ; CHECK-DISABLED-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8
838 ; CHECK-DISABLED-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8
839 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4
840 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
841 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4
842 ; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]]
843 ; CHECK-DISABLED-NEXT: ret void
846 %.global_tid..addr = alloca i32*, align 8
847 %.bound_tid..addr = alloca i32*, align 8
848 %x.addr = alloca i32*, align 8
849 store i32* %.global_tid., i32** %.global_tid..addr, align 8
850 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
851 store i32* %x, i32** %x.addr, align 8
852 %0 = load i32*, i32** %x.addr, align 8
853 %1 = load i32, i32* %0, align 4
854 %inc = add nsw i32 %1, 1
855 store i32 %inc, i32* %0, align 4
856 call void @unknown() #5
860 define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
861 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
862 ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
864 ; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
865 ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
866 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
867 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
868 ; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
869 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
870 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
871 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
872 ; CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
873 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
874 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
875 ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
876 ; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]]
877 ; CHECK-NEXT: ret void
879 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
880 ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
881 ; CHECK-DISABLED-NEXT: entry:
882 ; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
883 ; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
884 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
885 ; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
886 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
887 ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
888 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
889 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
890 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
891 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
892 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
893 ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
894 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]]
895 ; CHECK-DISABLED-NEXT: ret void
898 %.addr = alloca i16, align 2
899 %.addr1 = alloca i32, align 4
900 %.zero.addr = alloca i32, align 4
901 %global_args = alloca i8**, align 8
902 store i32 0, i32* %.zero.addr, align 4
903 store i16 %0, i16* %.addr, align 2
904 store i32 %1, i32* %.addr1, align 4
905 call void @__kmpc_get_shared_variables(i8*** %global_args)
906 %2 = load i8**, i8*** %global_args, align 8
907 %3 = getelementptr inbounds i8*, i8** %2, i64 0
908 %4 = bitcast i8** %3 to i32**
909 %5 = load i32*, i32** %4, align 8
910 call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %5) #3
914 define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50() #0 {
915 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50
916 ; CHECK-SAME: () #[[ATTR0]] {
918 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
919 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
920 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
921 ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
922 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
923 ; CHECK: user_code.entry:
924 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
925 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
926 ; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
927 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
928 ; CHECK-NEXT: ret void
929 ; CHECK: worker.exit:
930 ; CHECK-NEXT: ret void
932 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50
933 ; CHECK-DISABLED-SAME: () #[[ATTR0]] {
934 ; CHECK-DISABLED-NEXT: entry:
935 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
936 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
937 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
938 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
939 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
940 ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
941 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
942 ; CHECK-DISABLED: worker_state_machine.begin:
943 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
944 ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
945 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
946 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
947 ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
948 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
949 ; CHECK-DISABLED: worker_state_machine.finished:
950 ; CHECK-DISABLED-NEXT: ret void
951 ; CHECK-DISABLED: worker_state_machine.is_active.check:
952 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
953 ; CHECK-DISABLED: worker_state_machine.parallel_region.check:
954 ; CHECK-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
955 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
956 ; CHECK-DISABLED: worker_state_machine.parallel_region.execute:
957 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
958 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
959 ; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute:
960 ; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
961 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
962 ; CHECK-DISABLED: worker_state_machine.parallel_region.end:
963 ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
964 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
965 ; CHECK-DISABLED: worker_state_machine.done.barrier:
966 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
967 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
968 ; CHECK-DISABLED: thread.user_code.check:
969 ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
970 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
971 ; CHECK-DISABLED: user_code.entry:
972 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
973 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
974 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
975 ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
976 ; CHECK-DISABLED-NEXT: ret void
977 ; CHECK-DISABLED: worker.exit:
978 ; CHECK-DISABLED-NEXT: ret void
981 %.zero.addr = alloca i32, align 4
982 %.threadid_temp. = alloca i32, align 4
983 store i32 0, i32* %.zero.addr, align 4
984 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
985 %exec_user_code = icmp eq i32 %0, -1
986 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
988 user_code.entry: ; preds = %entry
989 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
990 store i32 %1, i32* %.threadid_temp., align 4
991 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
992 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
995 worker.exit: ; preds = %entry
999 define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
1000 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6
1001 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1002 ; CHECK-NEXT: entry:
1003 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1004 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1005 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
1006 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
1007 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1008 ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
1009 ; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
1010 ; CHECK: region.check.tid:
1011 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
1012 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
1013 ; CHECK-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
1014 ; CHECK: region.guarded:
1015 ; CHECK-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4
1016 ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
1017 ; CHECK: region.guarded.end:
1018 ; CHECK-NEXT: br label [[REGION_BARRIER]]
1019 ; CHECK: region.barrier:
1020 ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
1021 ; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
1022 ; CHECK: region.exit:
1023 ; CHECK-NEXT: store i32 0, i32* [[I]], align 4
1024 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
1026 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4
1027 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 100
1028 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
1030 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1031 ; CHECK-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP3]], align 8
1032 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
1033 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1034 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP4]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP5]], i64 noundef 1)
1035 ; CHECK-NEXT: br label [[FOR_INC:%.*]]
1037 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4
1038 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
1039 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4
1040 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
1042 ; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]]
1043 ; CHECK-NEXT: ret void
1045 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
1046 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1047 ; CHECK-DISABLED-NEXT: entry:
1048 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1049 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1050 ; CHECK-DISABLED-NEXT: [[I:%.*]] = alloca i32, align 4
1051 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
1052 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1053 ; CHECK-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
1054 ; CHECK-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4
1055 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4
1056 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]]
1057 ; CHECK-DISABLED: for.cond:
1058 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4
1059 ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100
1060 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
1061 ; CHECK-DISABLED: for.body:
1062 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1063 ; CHECK-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP1]], align 8
1064 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
1065 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1066 ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 1)
1067 ; CHECK-DISABLED-NEXT: br label [[FOR_INC:%.*]]
1068 ; CHECK-DISABLED: for.inc:
1069 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4
1070 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
1071 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
1072 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
1073 ; CHECK-DISABLED: for.end:
1074 ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]]
1075 ; CHECK-DISABLED-NEXT: ret void
1078 %.global_tid..addr = alloca i32*, align 8
1079 %.bound_tid..addr = alloca i32*, align 8
1080 %i = alloca i32, align 4
1081 %captured_vars_addrs = alloca [1 x i8*], align 8
1082 store i32* %.global_tid., i32** %.global_tid..addr, align 8
1083 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
1084 %x = call i8* @__kmpc_alloc_shared(i64 4)
1085 %x_on_stack = bitcast i8* %x to i32*
1086 store i32 42, i32* %x_on_stack, align 4
1087 store i32 0, i32* %i, align 4
1090 for.cond: ; preds = %for.inc, %entry
1091 %0 = load i32, i32* %i, align 4
1092 %cmp = icmp slt i32 %0, 100
1093 br i1 %cmp, label %for.body, label %for.end
1095 for.body: ; preds = %for.cond
1096 %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0
1097 %2 = bitcast i32* %x_on_stack to i8*
1098 store i8* %2, i8** %1, align 8
1099 %3 = load i32*, i32** %.global_tid..addr, align 8
1100 %4 = load i32, i32* %3, align 4
1101 %5 = bitcast [1 x i8*]* %captured_vars_addrs to i8**
1102 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %4, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %5, i64 1)
1105 for.inc: ; preds = %for.body
1106 %6 = load i32, i32* %i, align 4
1107 %inc = add nsw i32 %6, 1
1108 store i32 %inc, i32* %i, align 4
1109 br label %for.cond, !llvm.loop !17
1111 for.end: ; preds = %for.cond
1112 call void @spmd_amenable() #4
1113 call void @__kmpc_free_shared(i8* %x, i64 4)
1117 define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 {
1118 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7
1119 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
1120 ; CHECK-NEXT: entry:
1121 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1122 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1123 ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8
1124 ; CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8
1125 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4
1126 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1127 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4
1128 ; CHECK-NEXT: call void @unknown() #[[ATTR6]]
1129 ; CHECK-NEXT: ret void
1131 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
1132 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
1133 ; CHECK-DISABLED-NEXT: entry:
1134 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1135 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1136 ; CHECK-DISABLED-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8
1137 ; CHECK-DISABLED-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8
1138 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4
1139 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1140 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4
1141 ; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]]
1142 ; CHECK-DISABLED-NEXT: ret void
1145 %.global_tid..addr = alloca i32*, align 8
1146 %.bound_tid..addr = alloca i32*, align 8
1147 %x.addr = alloca i32*, align 8
1148 store i32* %.global_tid., i32** %.global_tid..addr, align 8
1149 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
1150 store i32* %x, i32** %x.addr, align 8
1151 %0 = load i32*, i32** %x.addr, align 8
1152 %1 = load i32, i32* %0, align 4
1153 %inc = add nsw i32 %1, 1
1154 store i32 %inc, i32* %0, align 4
1155 call void @unknown() #5
1159 define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
1160 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
1161 ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1162 ; CHECK-NEXT: entry:
1163 ; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1164 ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1165 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1166 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
1167 ; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1168 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
1169 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
1170 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
1171 ; CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
1172 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
1173 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
1174 ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
1175 ; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]]
1176 ; CHECK-NEXT: ret void
1178 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
1179 ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1180 ; CHECK-DISABLED-NEXT: entry:
1181 ; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
1182 ; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1183 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1184 ; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
1185 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1186 ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
1187 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
1188 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
1189 ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
1190 ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
1191 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
1192 ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
1193 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]]
1194 ; CHECK-DISABLED-NEXT: ret void
1197 %.addr = alloca i16, align 2
1198 %.addr1 = alloca i32, align 4
1199 %.zero.addr = alloca i32, align 4
1200 %global_args = alloca i8**, align 8
1201 store i32 0, i32* %.zero.addr, align 4
1202 store i16 %0, i16* %.addr, align 2
1203 store i32 %1, i32* %.addr1, align 4
1204 call void @__kmpc_get_shared_variables(i8*** %global_args)
1205 %2 = load i8**, i8*** %global_args, align 8
1206 %3 = getelementptr inbounds i8*, i8** %2, i64 0
1207 %4 = bitcast i8** %3 to i32**
1208 %5 = load i32*, i32** %4, align 8
1209 call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %5) #3
1213 define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 {
1214 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65
1215 ; CHECK-SAME: () #[[ATTR0]] {
1216 ; CHECK-NEXT: entry:
1217 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
1218 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1219 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1220 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
1221 ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1222 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1223 ; CHECK: worker_state_machine.begin:
1224 ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
1225 ; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
1226 ; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
1227 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
1228 ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
1229 ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1230 ; CHECK: worker_state_machine.finished:
1231 ; CHECK-NEXT: ret void
1232 ; CHECK: worker_state_machine.is_active.check:
1233 ; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1234 ; CHECK: worker_state_machine.parallel_region.fallback.execute:
1235 ; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1236 ; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1237 ; CHECK: worker_state_machine.parallel_region.end:
1238 ; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
1239 ; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1240 ; CHECK: worker_state_machine.done.barrier:
1241 ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
1242 ; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1243 ; CHECK: thread.user_code.check:
1244 ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1245 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1246 ; CHECK: user_code.entry:
1247 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
1248 ; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
1249 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
1250 ; CHECK-NEXT: ret void
1251 ; CHECK: worker.exit:
1252 ; CHECK-NEXT: ret void
1254 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65
1255 ; CHECK-DISABLED-SAME: () #[[ATTR0]] {
1256 ; CHECK-DISABLED-NEXT: entry:
1257 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
1258 ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1259 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1260 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1261 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
1262 ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1263 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1264 ; CHECK-DISABLED: worker_state_machine.begin:
1265 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
1266 ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
1267 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
1268 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
1269 ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
1270 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1271 ; CHECK-DISABLED: worker_state_machine.finished:
1272 ; CHECK-DISABLED-NEXT: ret void
1273 ; CHECK-DISABLED: worker_state_machine.is_active.check:
1274 ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1275 ; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute:
1276 ; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
1277 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1278 ; CHECK-DISABLED: worker_state_machine.parallel_region.end:
1279 ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
1280 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1281 ; CHECK-DISABLED: worker_state_machine.done.barrier:
1282 ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
1283 ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1284 ; CHECK-DISABLED: thread.user_code.check:
1285 ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1286 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1287 ; CHECK-DISABLED: user_code.entry:
1288 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
1289 ; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
1290 ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
1291 ; CHECK-DISABLED-NEXT: ret void
1292 ; CHECK-DISABLED: worker.exit:
1293 ; CHECK-DISABLED-NEXT: ret void
1296 %.zero.addr = alloca i32, align 4
1297 %.threadid_temp. = alloca i32, align 4
1298 store i32 0, i32* %.zero.addr, align 4
1299 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
1300 %exec_user_code = icmp eq i32 %0, -1
1301 br i1 %exec_user_code, label %user_code.entry, label %worker.exit
1303 user_code.entry: ; preds = %entry
1304 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
1305 store i32 %1, i32* %.threadid_temp., align 4
1306 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #3
1307 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
1310 worker.exit: ; preds = %entry
1314 define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
1315 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8
1316 ; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1317 ; CHECK-NEXT: entry:
1318 ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1319 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1320 ; CHECK-NEXT: call void @unknown() #[[ATTR6]]
1321 ; CHECK-NEXT: ret void
1323 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
1324 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1325 ; CHECK-DISABLED-NEXT: entry:
1326 ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1327 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1328 ; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]]
1329 ; CHECK-DISABLED-NEXT: ret void
1332 %.global_tid..addr = alloca i32*, align 8
1333 %.bound_tid..addr = alloca i32*, align 8
1334 store i32* %.global_tid., i32** %.global_tid..addr, align 8
1335 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
1336 call void @unknown() #5
1340 attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1341 attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1342 attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1343 attributes #3 = { nounwind }
1344 attributes #4 = { convergent "llvm.assume"="ompx_spmd_amenable" }
1345 attributes #5 = { convergent }
1347 !omp_offload.info = !{!0, !1, !2, !3, !4}
1348 !nvvm.annotations = !{!5, !6, !7, !8, !9}
1349 !llvm.module.flags = !{!10, !11, !12}
1351 !0 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1}
1352 !1 = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0}
1353 !2 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2}
1354 !3 = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4}
1355 !4 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
1356 !5 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1}
1357 !6 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
1358 !7 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
1359 !8 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
1360 !9 = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1}
1361 !10 = !{i32 1, !"wchar_size", i32 4}
1362 !11 = !{i32 7, !"openmp", i32 50}
1363 !12 = !{i32 7, !"openmp-device", i32 50}
1364 !13 = distinct !{!13, !14}
1365 !14 = !{!"llvm.loop.mustprogress"}
1366 !15 = distinct !{!15, !14}
1367 !16 = distinct !{!16, !14}
1368 !17 = distinct !{!17, !14}
1370 ; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1371 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1372 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1373 ; CHECK: attributes #[[ATTR3]] = { nounwind }
1374 ; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind }
1375 ; CHECK: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
1376 ; CHECK: attributes #[[ATTR6]] = { convergent }
1378 ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1379 ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1380 ; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
1381 ; CHECK-DISABLED: attributes #[[ATTR3]] = { nounwind }
1382 ; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind }
1383 ; CHECK-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
1384 ; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent }
1386 ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1}
1387 ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0}
1388 ; CHECK: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2}
1389 ; CHECK: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4}
1390 ; CHECK: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
1391 ; CHECK: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1}
1392 ; CHECK: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
1393 ; CHECK: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
1394 ; CHECK: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
1395 ; CHECK: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1}
1396 ; CHECK: [[META10:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
1397 ; CHECK: [[META11:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1398 ; CHECK: [[META12:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1399 ; CHECK: [[LOOP13]] = distinct !{!13, !14}
1400 ; CHECK: [[META14:![0-9]+]] = !{!"llvm.loop.mustprogress"}
1401 ; CHECK: [[LOOP15]] = distinct !{!15, !14}
1402 ; CHECK: [[LOOP16]] = distinct !{!16, !14}
1403 ; CHECK: [[LOOP17]] = distinct !{!17, !14}
1405 ; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1}
1406 ; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0}
1407 ; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2}
1408 ; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4}
1409 ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
1410 ; CHECK-DISABLED: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1}
1411 ; CHECK-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
1412 ; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
1413 ; CHECK-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
1414 ; CHECK-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1}
1415 ; CHECK-DISABLED: [[META10:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
1416 ; CHECK-DISABLED: [[META11:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1417 ; CHECK-DISABLED: [[META12:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1418 ; CHECK-DISABLED: [[LOOP13]] = distinct !{!13, !14}
1419 ; CHECK-DISABLED: [[META14:![0-9]+]] = !{!"llvm.loop.mustprogress"}
1420 ; CHECK-DISABLED: [[LOOP15]] = distinct !{!15, !14}
1421 ; CHECK-DISABLED: [[LOOP16]] = distinct !{!16, !14}
1422 ; CHECK-DISABLED: [[LOOP17]] = distinct !{!17, !14}