1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
2 ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
3 ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
5 target triple = "amdgcn-amd-amdhsa"
7 %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
8 %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
10 @G = internal addrspace(3) global i32 undef, align 4
11 @H = internal addrspace(3) global i32 undef, align 4
12 @X = internal addrspace(3) global i32 undef, align 4
13 @QA1 = internal addrspace(3) global i32 undef, align 4
14 @QB1 = internal addrspace(3) global i32 undef, align 4
15 @QC1 = internal addrspace(3) global i32 undef, align 4
16 @QD1 = internal addrspace(3) global i32 undef, align 4
17 @QA2 = internal addrspace(3) global i32 undef, align 4
18 @QB2 = internal addrspace(3) global i32 undef, align 4
19 @QC2 = internal addrspace(3) global i32 undef, align 4
20 @QD2 = internal addrspace(3) global i32 undef, align 4
21 @QA3 = internal addrspace(3) global i32 undef, align 4
22 @QB3 = internal addrspace(3) global i32 undef, align 4
23 @QC3 = internal addrspace(3) global i32 undef, align 4
24 @QD3 = internal addrspace(3) global i32 undef, align 4
25 @UAA1 = internal addrspace(3) global i32 undef, align 4
26 @UAA2 = internal addrspace(3) global i32 undef, align 4
27 @UAA3 = internal addrspace(3) global i32 undef, align 4
28 @UANA1 = internal addrspace(3) global i32 undef, align 4
29 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
30 @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
32 ; Make sure we do not delete the stores to @G without also replacing the load with `1`.
34 ; CHECK: @G = internal addrspace(3) global i32 undef, align 4
35 ; CHECK: @H = internal addrspace(3) global i32 undef, align 4
36 ; CHECK: @X = internal addrspace(3) global i32 undef, align 4
37 ; CHECK: @QA1 = internal addrspace(3) global i32 undef, align 4
38 ; CHECK: @QB1 = internal addrspace(3) global i32 undef, align 4
39 ; CHECK: @QC1 = internal addrspace(3) global i32 undef, align 4
40 ; CHECK: @QD1 = internal addrspace(3) global i32 undef, align 4
41 ; CHECK: @QA2 = internal addrspace(3) global i32 undef, align 4
42 ; CHECK: @QB2 = internal addrspace(3) global i32 undef, align 4
43 ; CHECK: @QC2 = internal addrspace(3) global i32 undef, align 4
44 ; CHECK: @QD2 = internal addrspace(3) global i32 undef, align 4
45 ; CHECK: @QA3 = internal addrspace(3) global i32 undef, align 4
46 ; CHECK: @QB3 = internal addrspace(3) global i32 undef, align 4
47 ; CHECK: @QC3 = internal addrspace(3) global i32 undef, align 4
48 ; CHECK: @QD3 = internal addrspace(3) global i32 undef, align 4
49 ; CHECK: @UAA1 = internal addrspace(3) global i32 undef, align 4
50 ; CHECK: @UAA2 = internal addrspace(3) global i32 undef, align 4
51 ; CHECK: @UAA3 = internal addrspace(3) global i32 undef, align 4
52 ; CHECK: @UANA1 = internal addrspace(3) global i32 undef, align 4
53 ; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
54 ; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
56 define void @kernel(ptr %dyn) "kernel" {
58 ; TUNIT: Function Attrs: norecurse
59 ; TUNIT-LABEL: define {{[^@]+}}@kernel
60 ; TUNIT-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
61 ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
62 ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
63 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
65 ; TUNIT-NEXT: br label [[IF_MERGE:%.*]]
67 ; TUNIT-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
68 ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7:[0-9]+]]
69 ; TUNIT-NEXT: call void @llvm.assume(i1 true)
70 ; TUNIT-NEXT: call void @barrier() #[[ATTR6]]
71 ; TUNIT-NEXT: br label [[IF_MERGE]]
73 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
74 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
76 ; TUNIT-NEXT: call void @barrier() #[[ATTR6]]
77 ; TUNIT-NEXT: br label [[IF_END]]
79 ; TUNIT-NEXT: call void @__kmpc_target_deinit()
80 ; TUNIT-NEXT: ret void
82 ; CGSCC: Function Attrs: norecurse
83 ; CGSCC-LABEL: define {{[^@]+}}@kernel
84 ; CGSCC-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
85 ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
86 ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
87 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
89 ; CGSCC-NEXT: br label [[IF_MERGE:%.*]]
91 ; CGSCC-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
92 ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]]
93 ; CGSCC-NEXT: call void @llvm.assume(i1 true)
94 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
95 ; CGSCC-NEXT: br label [[IF_MERGE]]
97 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
98 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
100 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
101 ; CGSCC-NEXT: br label [[IF_END]]
103 ; CGSCC-NEXT: call void @__kmpc_target_deinit()
104 ; CGSCC-NEXT: ret void
106 %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn)
107 %cmp = icmp eq i32 %call, -1
108 br i1 %cmp, label %if.then, label %if.else
110 store i32 1, ptr addrspace(3) @G
111 store i32 2, ptr addrspace(3) @H
114 call void @barrier();
115 %l = load i32, ptr addrspace(3) @G
116 call void @use1(i32 %l)
117 %hv = load i32, ptr addrspace(3) @H
118 %hc = icmp eq i32 %hv, 2
119 call void @llvm.assume(i1 %hc)
120 call void @barrier();
123 %hreload = load i32, ptr addrspace(3) @H
124 call void @use1(i32 %hreload)
125 br i1 %cmp, label %if.then2, label %if.end
127 store i32 2, ptr addrspace(3) @G
128 call void @barrier();
131 call void @__kmpc_target_deinit()
135 define void @test_assume() {
136 ; CHECK-LABEL: define {{[^@]+}}@test_assume() {
137 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null
138 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
139 ; CHECK-NEXT: ret void
141 %cmp = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null
142 call void @llvm.assume(i1 %cmp)
146 ; We can't ignore the sync, hence this might store 2 into %p
147 define void @kernel2(ptr %p) "kernel" {
148 ; CHECK-LABEL: define {{[^@]+}}@kernel2
149 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
150 ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
151 ; CHECK-NEXT: call void @sync()
152 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
153 ; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4
154 ; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
155 ; CHECK-NEXT: ret void
157 store i32 1, ptr addrspace(3) @X
159 %v = load i32, ptr addrspace(3) @X
160 store i32 2, ptr addrspace(3) @X
165 ; We can't ignore the sync, hence this might store 2 into %p
166 define void @kernel3(ptr %p) "kernel" {
167 ; TUNIT-LABEL: define {{[^@]+}}@kernel3
168 ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
169 ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
170 ; TUNIT-NEXT: call void @sync_def.internalized()
171 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
172 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4
173 ; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4
174 ; TUNIT-NEXT: ret void
176 ; CGSCC-LABEL: define {{[^@]+}}@kernel3
177 ; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
178 ; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4
179 ; CGSCC-NEXT: call void @sync_def()
180 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
181 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4
182 ; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4
183 ; CGSCC-NEXT: ret void
185 store i32 1, ptr addrspace(3) @X
186 call void @sync_def()
187 %v = load i32, ptr addrspace(3) @X
188 store i32 2, ptr addrspace(3) @X
193 define void @sync_def() {
194 ; CHECK-LABEL: define {{[^@]+}}@sync_def() {
195 ; CHECK-NEXT: call void @sync()
196 ; CHECK-NEXT: ret void
202 define void @kernel4a1(i1 %c) "kernel" {
203 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a1
204 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
205 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4
206 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
208 ; TUNIT-NEXT: call void @sync()
209 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
210 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
211 ; TUNIT-NEXT: ret void
213 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4
214 ; TUNIT-NEXT: call void @sync()
215 ; TUNIT-NEXT: ret void
217 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a1
218 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
219 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4
220 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
222 ; CGSCC-NEXT: call void @sync()
223 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
224 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
225 ; CGSCC-NEXT: ret void
227 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4
228 ; CGSCC-NEXT: call void @sync()
229 ; CGSCC-NEXT: ret void
231 store i32 0, ptr addrspace(3) @QA1
232 br i1 %c, label %S, label %L
235 %v = load i32, ptr addrspace(3) @QA1
236 call void @use1(i32 %v)
239 store i32 2, ptr addrspace(3) @QA1
244 ; We should not replace the load or delete the second store.
245 define void @kernel4b1(i1 %c) "kernel" {
246 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
247 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
248 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
249 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
251 ; TUNIT-NEXT: call void @sync()
252 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
253 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
254 ; TUNIT-NEXT: ret void
256 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
257 ; TUNIT-NEXT: ret void
259 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b1
260 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
261 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
262 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
264 ; CGSCC-NEXT: call void @sync()
265 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
266 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
267 ; CGSCC-NEXT: ret void
269 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
270 ; CGSCC-NEXT: ret void
272 store i32 0, ptr addrspace(3) @QB1
273 br i1 %c, label %S, label %L
276 %v = load i32, ptr addrspace(3) @QB1
277 call void @use1(i32 %v)
280 store i32 2, ptr addrspace(3) @QB1
284 define void @kernel4a2(i1 %c) "kernel" {
285 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a2
286 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
287 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
289 ; TUNIT-NEXT: call void @sync()
290 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
291 ; TUNIT-NEXT: ret void
293 ; TUNIT-NEXT: call void @sync()
294 ; TUNIT-NEXT: ret void
296 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a2
297 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
298 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
300 ; CGSCC-NEXT: call void @sync()
301 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
302 ; CGSCC-NEXT: ret void
304 ; CGSCC-NEXT: call void @sync()
305 ; CGSCC-NEXT: ret void
307 br i1 %c, label %S, label %L
310 %v = load i32, ptr addrspace(3) @QA2
311 call void @use1(i32 %v)
314 store i32 2, ptr addrspace(3) @QA2
319 ; FIXME: We should not replace the load with undef.
320 define void @kernel4b2(i1 %c) "kernel" {
321 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b2
322 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
323 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
325 ; TUNIT-NEXT: call void @sync()
326 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
327 ; TUNIT-NEXT: ret void
329 ; TUNIT-NEXT: ret void
331 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b2
332 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
333 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
335 ; CGSCC-NEXT: call void @sync()
336 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
337 ; CGSCC-NEXT: ret void
339 ; CGSCC-NEXT: ret void
341 br i1 %c, label %S, label %L
344 %v = load i32, ptr addrspace(3) @QB2
345 call void @use1(i32 %v)
348 store i32 2, ptr addrspace(3) @QB2
352 define void @kernel4a3(i1 %c) "kernel" {
353 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a3
354 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
355 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4
356 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
358 ; TUNIT-NEXT: call void @sync()
359 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
360 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
361 ; TUNIT-NEXT: ret void
363 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4
364 ; TUNIT-NEXT: call void @sync()
365 ; TUNIT-NEXT: call void @sync()
366 ; TUNIT-NEXT: call void @sync()
367 ; TUNIT-NEXT: call void @sync()
368 ; TUNIT-NEXT: ret void
370 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a3
371 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
372 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4
373 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
375 ; CGSCC-NEXT: call void @sync()
376 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
377 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
378 ; CGSCC-NEXT: ret void
380 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4
381 ; CGSCC-NEXT: call void @sync()
382 ; CGSCC-NEXT: call void @sync()
383 ; CGSCC-NEXT: call void @sync()
384 ; CGSCC-NEXT: call void @sync()
385 ; CGSCC-NEXT: ret void
387 store i32 0, ptr addrspace(3) @QA3
388 br i1 %c, label %S, label %L
391 %v = load i32, ptr addrspace(3) @QA3
392 call void @use1(i32 %v)
395 store i32 2, ptr addrspace(3) @QA3
403 ; The load of QB3 should not be simplified to 0.
404 define void @kernel4b3(i1 %c) "kernel" {
405 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b3
406 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
407 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4
408 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
410 ; TUNIT-NEXT: call void @sync()
411 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
412 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
413 ; TUNIT-NEXT: ret void
415 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4
416 ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]]
417 ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7]]
418 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
419 ; TUNIT-NEXT: call void @use1(i32 3) #[[ATTR7]]
420 ; TUNIT-NEXT: ret void
422 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b3
423 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
424 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4
425 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
427 ; CGSCC-NEXT: call void @sync()
428 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
429 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
430 ; CGSCC-NEXT: ret void
432 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4
433 ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]]
434 ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]]
435 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
436 ; CGSCC-NEXT: call void @use1(i32 3) #[[ATTR6]]
437 ; CGSCC-NEXT: ret void
439 store i32 0, ptr addrspace(3) @QB3
440 br i1 %c, label %S, label %L
443 %v = load i32, ptr addrspace(3) @QB3
444 call void @use1(i32 %v)
447 store i32 2, ptr addrspace(3) @QB3
448 call void @use1(i32 0)
449 call void @use1(i32 1)
450 call void @use1(i32 2)
451 call void @use1(i32 3)
456 define void @kernel4c1(i1 %c) "kernel" {
457 ; TUNIT: Function Attrs: norecurse
458 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c1
459 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
460 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
462 ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]]
463 ; TUNIT-NEXT: ret void
465 ; TUNIT-NEXT: ret void
467 ; CGSCC: Function Attrs: norecurse
468 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c1
469 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
470 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
472 ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]]
473 ; CGSCC-NEXT: ret void
475 ; CGSCC-NEXT: ret void
477 store i32 0, ptr addrspace(3) @QC1
478 br i1 %c, label %S, label %L
480 call void @barrier();
481 %v = load i32, ptr addrspace(3) @QC1
482 call void @use1(i32 %v)
485 store i32 2, ptr addrspace(3) @QC1
486 call void @barrier();
490 ; We should not replace the load or delete the second store.
491 define void @kernel4d1(i1 %c) "kernel" {
492 ; TUNIT: Function Attrs: norecurse
493 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d1
494 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
495 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4
496 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
498 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
499 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
500 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
501 ; TUNIT-NEXT: ret void
503 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4
504 ; TUNIT-NEXT: ret void
506 ; CGSCC: Function Attrs: norecurse
507 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d1
508 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
509 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4
510 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
512 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
513 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
514 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
515 ; CGSCC-NEXT: ret void
517 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4
518 ; CGSCC-NEXT: ret void
520 store i32 0, ptr addrspace(3) @QD1
521 br i1 %c, label %S, label %L
523 call void @barrier();
524 %v = load i32, ptr addrspace(3) @QD1
525 call void @use1(i32 %v)
528 store i32 2, ptr addrspace(3) @QD1
532 define void @kernel4c2(i1 %c) "kernel" {
533 ; TUNIT: Function Attrs: norecurse
534 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c2
535 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
536 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
538 ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
539 ; TUNIT-NEXT: ret void
541 ; TUNIT-NEXT: ret void
543 ; CGSCC: Function Attrs: norecurse
544 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c2
545 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
546 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
548 ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
549 ; CGSCC-NEXT: ret void
551 ; CGSCC-NEXT: ret void
553 br i1 %c, label %S, label %L
555 call void @barrier();
556 %v = load i32, ptr addrspace(3) @QC2
557 call void @use1(i32 %v)
560 store i32 2, ptr addrspace(3) @QC2
561 call void @barrier();
565 ; We should not replace the load with undef.
566 define void @kernel4d2(i1 %c) "kernel" {
567 ; TUNIT: Function Attrs: norecurse
568 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d2
569 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
570 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
572 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
573 ; TUNIT-NEXT: ret void
575 ; TUNIT-NEXT: ret void
577 ; CGSCC: Function Attrs: norecurse
578 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d2
579 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
580 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
582 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
583 ; CGSCC-NEXT: ret void
585 ; CGSCC-NEXT: ret void
587 br i1 %c, label %S, label %L
589 call void @barrier();
590 %v = load i32, ptr addrspace(3) @QD2
591 call void @use1(i32 %v)
594 store i32 2, ptr addrspace(3) @QD2
598 define void @kernel4c3(i1 %c) "kernel" {
599 ; TUNIT: Function Attrs: norecurse
600 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c3
601 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
602 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
604 ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
605 ; TUNIT-NEXT: ret void
607 ; TUNIT-NEXT: ret void
609 ; CGSCC: Function Attrs: norecurse
610 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c3
611 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
612 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
614 ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
615 ; CGSCC-NEXT: ret void
617 ; CGSCC-NEXT: ret void
619 br i1 %c, label %S, label %L
621 call void @barrier();
622 %v = load i32, ptr addrspace(3) @QC3
623 call void @use1(i32 %v)
626 store i32 2, ptr addrspace(3) @QC3
627 call void @barrier();
631 ; We should not replace the load with undef.
632 define void @kernel4d3(i1 %c) "kernel" {
633 ; TUNIT: Function Attrs: norecurse
634 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d3
635 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
636 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
638 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
639 ; TUNIT-NEXT: ret void
641 ; TUNIT-NEXT: ret void
643 ; CGSCC: Function Attrs: norecurse
644 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d3
645 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
646 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
648 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
649 ; CGSCC-NEXT: ret void
651 ; CGSCC-NEXT: ret void
653 br i1 %c, label %S, label %L
655 call void @barrier();
656 %v = load i32, ptr addrspace(3) @QD3
657 call void @use1(i32 %v)
660 store i32 2, ptr addrspace(3) @QD3
664 define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
665 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
666 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
667 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
669 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
670 ; TUNIT-NEXT: ret void
672 ; TUNIT-NEXT: call void @sync()
673 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
674 ; TUNIT-NEXT: call void @sync()
675 ; TUNIT-NEXT: ret void
677 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
678 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
679 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
681 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
682 ; CGSCC-NEXT: ret void
684 ; CGSCC-NEXT: call void @sync()
685 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
686 ; CGSCC-NEXT: call void @sync()
687 ; CGSCC-NEXT: ret void
689 br i1 %c, label %S, label %L
691 call void @barrier();
692 %v = load i32, ptr addrspace(3) @UAA1
693 call void @use1(i32 %v)
697 store i32 2, ptr addrspace(3) @UAA1
698 call void @barrier();
703 define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
704 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
705 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
706 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
708 ; TUNIT-NEXT: call void @sync()
709 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
710 ; TUNIT-NEXT: ret void
712 ; TUNIT-NEXT: call void @sync()
713 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
714 ; TUNIT-NEXT: call void @sync()
715 ; TUNIT-NEXT: ret void
717 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
718 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
719 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
721 ; CGSCC-NEXT: call void @sync()
722 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
723 ; CGSCC-NEXT: ret void
725 ; CGSCC-NEXT: call void @sync()
726 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
727 ; CGSCC-NEXT: call void @sync()
728 ; CGSCC-NEXT: ret void
730 br i1 %c, label %S, label %L
733 %v = load i32, ptr addrspace(3) @UAA2
734 call void @use1(i32 %v)
738 store i32 2, ptr addrspace(3) @UAA2
739 call void @barrier();
744 define void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
745 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
746 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
747 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
749 ; TUNIT-NEXT: call void @sync()
750 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
751 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
752 ; TUNIT-NEXT: ret void
754 ; TUNIT-NEXT: call void @sync()
755 ; TUNIT-NEXT: call void @sync()
756 ; TUNIT-NEXT: ret void
758 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
759 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
760 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
762 ; CGSCC-NEXT: call void @sync()
763 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
764 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
765 ; CGSCC-NEXT: ret void
767 ; CGSCC-NEXT: call void @sync()
768 ; CGSCC-NEXT: call void @sync()
769 ; CGSCC-NEXT: ret void
771 br i1 %c, label %S, label %L
774 %v = load i32, ptr addrspace(3) @UAA3
775 call void @use1(i32 %v)
776 call void @barrier();
780 store i32 2, ptr addrspace(3) @UAA3
785 define void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
786 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
787 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
788 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
790 ; TUNIT-NEXT: call void @sync()
791 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
792 ; TUNIT-NEXT: ret void
794 ; TUNIT-NEXT: call void @sync()
795 ; TUNIT-NEXT: call void @sync()
796 ; TUNIT-NEXT: ret void
798 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
799 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
800 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
802 ; CGSCC-NEXT: call void @sync()
803 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
804 ; CGSCC-NEXT: ret void
806 ; CGSCC-NEXT: call void @sync()
807 ; CGSCC-NEXT: call void @sync()
808 ; CGSCC-NEXT: ret void
810 br i1 %c, label %S, label %L
813 %v = load i32, ptr addrspace(3) @UANA1
814 call void @use1(i32 %v)
818 store i32 2, ptr addrspace(3) @UANA1
824 declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
825 declare void @use1(i32) nosync norecurse nounwind nocallback
826 declare i32 @__kmpc_target_init(ptr, ptr) nocallback
827 declare void @__kmpc_target_deinit() nocallback
828 declare void @llvm.assume(i1)
830 !llvm.module.flags = !{!0, !1}
831 !nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20}
833 !0 = !{i32 7, !"openmp", i32 50}
834 !1 = !{i32 7, !"openmp-device", i32 50}
835 !2 = !{ptr @kernel, !"kernel", i32 1}
836 !3 = !{ptr @kernel2, !"kernel", i32 1}
837 !4 = !{ptr @kernel3, !"kernel", i32 1}
838 !5 = !{ptr @kernel4a1, !"kernel", i32 1}
839 !6 = !{ptr @kernel4b1, !"kernel", i32 1}
840 !7 = !{ptr @kernel4a2, !"kernel", i32 1}
841 !8 = !{ptr @kernel4b2, !"kernel", i32 1}
842 !9 = !{ptr @kernel4a3, !"kernel", i32 1}
843 !10 = !{ptr @kernel4b3, !"kernel", i32 1}
844 !11 = !{ptr @kernel4c1, !"kernel", i32 1}
845 !12 = !{ptr @kernel4d1, !"kernel", i32 1}
846 !13 = !{ptr @kernel4c2, !"kernel", i32 1}
847 !14 = !{ptr @kernel4d2, !"kernel", i32 1}
848 !15 = !{ptr @kernel4c3, !"kernel", i32 1}
849 !16 = !{ptr @kernel4d3, !"kernel", i32 1}
850 !17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
851 !18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
852 !19 = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
853 !20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
856 ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
857 ; TUNIT: attributes #[[ATTR1]] = { "kernel" }
858 ; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
859 ; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
860 ; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback }
861 ; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
862 ; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" }
863 ; TUNIT: attributes #[[ATTR7]] = { nounwind }
865 ; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
866 ; CGSCC: attributes #[[ATTR1]] = { "kernel" }
867 ; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
868 ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
869 ; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback }
870 ; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
871 ; CGSCC: attributes #[[ATTR6]] = { nounwind }
873 ; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
874 ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
875 ; TUNIT: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
876 ; TUNIT: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
877 ; TUNIT: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
878 ; TUNIT: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
879 ; TUNIT: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
880 ; TUNIT: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
881 ; TUNIT: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
882 ; TUNIT: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
883 ; TUNIT: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
884 ; TUNIT: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
885 ; TUNIT: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
886 ; TUNIT: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
887 ; TUNIT: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
888 ; TUNIT: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
889 ; TUNIT: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
890 ; TUNIT: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
891 ; TUNIT: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
892 ; TUNIT: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
893 ; TUNIT: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
895 ; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
896 ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
897 ; CGSCC: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
898 ; CGSCC: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
899 ; CGSCC: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
900 ; CGSCC: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
901 ; CGSCC: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
902 ; CGSCC: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
903 ; CGSCC: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
904 ; CGSCC: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
905 ; CGSCC: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
906 ; CGSCC: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
907 ; CGSCC: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
908 ; CGSCC: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
909 ; CGSCC: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
910 ; CGSCC: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
911 ; CGSCC: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
912 ; CGSCC: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
913 ; CGSCC: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
914 ; CGSCC: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
915 ; CGSCC: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}