1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
2 ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
3 ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
5 target triple = "amdgcn-amd-amdhsa"
7 %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32 }
8 %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
10 @G = internal addrspace(3) global i32 undef, align 4
11 @H = internal addrspace(3) global i32 undef, align 4
12 @X = internal addrspace(3) global i32 undef, align 4
13 @QA1 = internal addrspace(3) global i32 undef, align 4
14 @QB1 = internal addrspace(3) global i32 undef, align 4
15 @QC1 = internal addrspace(3) global i32 undef, align 4
16 @QD1 = internal addrspace(3) global i32 undef, align 4
17 @QA2 = internal addrspace(3) global i32 undef, align 4
18 @QB2 = internal addrspace(3) global i32 undef, align 4
19 @QC2 = internal addrspace(3) global i32 undef, align 4
20 @QD2 = internal addrspace(3) global i32 undef, align 4
21 @QA3 = internal addrspace(3) global i32 undef, align 4
22 @QB3 = internal addrspace(3) global i32 undef, align 4
23 @QC3 = internal addrspace(3) global i32 undef, align 4
24 @QD3 = internal addrspace(3) global i32 undef, align 4
25 @UAA1 = internal addrspace(3) global i32 undef, align 4
26 @UAA2 = internal addrspace(3) global i32 undef, align 4
27 @UAA3 = internal addrspace(3) global i32 undef, align 4
28 @UANA1 = internal addrspace(3) global i32 undef, align 4
29 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
30 @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
32 ; Make sure we do not delete the stores to @G without also replacing the load with `1`.
34 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
35 ; CHECK: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
36 ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
37 ; CHECK: @[[QA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
38 ; CHECK: @[[QB1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
39 ; CHECK: @[[QC1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
40 ; CHECK: @[[QD1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
41 ; CHECK: @[[QA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
42 ; CHECK: @[[QB2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
43 ; CHECK: @[[QC2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
44 ; CHECK: @[[QD2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
45 ; CHECK: @[[QA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
46 ; CHECK: @[[QB3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
47 ; CHECK: @[[QC3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
48 ; CHECK: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
49 ; CHECK: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
50 ; CHECK: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
51 ; CHECK: @[[UAA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
52 ; CHECK: @[[UANA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
53 ; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
54 ; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
56 define void @kernel(ptr %dyn) "kernel" {
58 ; TUNIT: Function Attrs: norecurse
59 ; TUNIT-LABEL: define {{[^@]+}}@kernel
60 ; TUNIT-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
61 ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
62 ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
63 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
65 ; TUNIT-NEXT: br label [[IF_MERGE:%.*]]
67 ; TUNIT-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
68 ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7:[0-9]+]]
69 ; TUNIT-NEXT: call void @llvm.assume(i1 true)
70 ; TUNIT-NEXT: call void @barrier() #[[ATTR6]]
71 ; TUNIT-NEXT: br label [[IF_MERGE]]
73 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
74 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
76 ; TUNIT-NEXT: call void @barrier() #[[ATTR6]]
77 ; TUNIT-NEXT: br label [[IF_END]]
79 ; TUNIT-NEXT: call void @__kmpc_target_deinit()
80 ; TUNIT-NEXT: ret void
82 ; CGSCC: Function Attrs: norecurse
83 ; CGSCC-LABEL: define {{[^@]+}}@kernel
84 ; CGSCC-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
85 ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
86 ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
87 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
89 ; CGSCC-NEXT: br label [[IF_MERGE:%.*]]
91 ; CGSCC-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
92 ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]]
93 ; CGSCC-NEXT: call void @llvm.assume(i1 true)
94 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
95 ; CGSCC-NEXT: br label [[IF_MERGE]]
97 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
98 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
100 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
101 ; CGSCC-NEXT: br label [[IF_END]]
103 ; CGSCC-NEXT: call void @__kmpc_target_deinit()
104 ; CGSCC-NEXT: ret void
106 %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn)
107 %cmp = icmp eq i32 %call, -1
108 br i1 %cmp, label %if.then, label %if.else
110 store i32 1, ptr addrspace(3) @G
111 store i32 2, ptr addrspace(3) @H
114 call void @barrier();
115 %l = load i32, ptr addrspace(3) @G
116 call void @use1(i32 %l)
117 %hv = load i32, ptr addrspace(3) @H
118 %hc = icmp eq i32 %hv, 2
119 call void @llvm.assume(i1 %hc)
120 call void @barrier();
123 %hreload = load i32, ptr addrspace(3) @H
124 call void @use1(i32 %hreload)
125 br i1 %cmp, label %if.then2, label %if.end
127 store i32 2, ptr addrspace(3) @G
128 call void @barrier();
131 call void @__kmpc_target_deinit()
135 define void @test_assume() {
136 ; CHECK-LABEL: define {{[^@]+}}@test_assume() {
137 ; CHECK-NEXT: call void @llvm.assume(i1 icmp ne (ptr addrspacecast (ptr addrspace(4) @str to ptr), ptr null))
138 ; CHECK-NEXT: ret void
140 call void @llvm.assume(i1 icmp ne (ptr addrspacecast (ptr addrspace(4) @str to ptr), ptr null))
144 ; We can't ignore the sync, hence this might store 2 into %p
145 define void @kernel2(ptr %p) "kernel" {
146 ; CHECK-LABEL: define {{[^@]+}}@kernel2
147 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
148 ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
149 ; CHECK-NEXT: call void @sync()
150 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
151 ; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4
152 ; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
153 ; CHECK-NEXT: ret void
155 store i32 1, ptr addrspace(3) @X
157 %v = load i32, ptr addrspace(3) @X
158 store i32 2, ptr addrspace(3) @X
163 ; We can't ignore the sync, hence this might store 2 into %p
164 define void @kernel3(ptr %p) "kernel" {
165 ; TUNIT-LABEL: define {{[^@]+}}@kernel3
166 ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
167 ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
168 ; TUNIT-NEXT: call void @sync_def.internalized()
169 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
170 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4
171 ; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4
172 ; TUNIT-NEXT: ret void
174 ; CGSCC-LABEL: define {{[^@]+}}@kernel3
175 ; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
176 ; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4
177 ; CGSCC-NEXT: call void @sync_def()
178 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
179 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4
180 ; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4
181 ; CGSCC-NEXT: ret void
183 store i32 1, ptr addrspace(3) @X
184 call void @sync_def()
185 %v = load i32, ptr addrspace(3) @X
186 store i32 2, ptr addrspace(3) @X
191 define void @sync_def() {
192 ; CHECK-LABEL: define {{[^@]+}}@sync_def() {
193 ; CHECK-NEXT: call void @sync()
194 ; CHECK-NEXT: ret void
200 define void @kernel4a1(i1 %c) "kernel" {
201 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a1
202 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
203 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4
204 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
206 ; TUNIT-NEXT: call void @sync()
207 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
208 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
209 ; TUNIT-NEXT: ret void
211 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4
212 ; TUNIT-NEXT: call void @sync()
213 ; TUNIT-NEXT: ret void
215 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a1
216 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
217 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4
218 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
220 ; CGSCC-NEXT: call void @sync()
221 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
222 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
223 ; CGSCC-NEXT: ret void
225 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4
226 ; CGSCC-NEXT: call void @sync()
227 ; CGSCC-NEXT: ret void
229 store i32 0, ptr addrspace(3) @QA1
230 br i1 %c, label %S, label %L
233 %v = load i32, ptr addrspace(3) @QA1
234 call void @use1(i32 %v)
237 store i32 2, ptr addrspace(3) @QA1
242 ; We should not replace the load or delete the second store.
243 define void @kernel4b1(i1 %c) "kernel" {
244 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
245 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
246 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
247 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
249 ; TUNIT-NEXT: call void @sync()
250 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
251 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
252 ; TUNIT-NEXT: ret void
254 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
255 ; TUNIT-NEXT: ret void
257 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b1
258 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
259 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
260 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
262 ; CGSCC-NEXT: call void @sync()
263 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
264 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
265 ; CGSCC-NEXT: ret void
267 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
268 ; CGSCC-NEXT: ret void
270 store i32 0, ptr addrspace(3) @QB1
271 br i1 %c, label %S, label %L
274 %v = load i32, ptr addrspace(3) @QB1
275 call void @use1(i32 %v)
278 store i32 2, ptr addrspace(3) @QB1
282 define void @kernel4a2(i1 %c) "kernel" {
283 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a2
284 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
285 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
287 ; TUNIT-NEXT: call void @sync()
288 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
289 ; TUNIT-NEXT: ret void
291 ; TUNIT-NEXT: call void @sync()
292 ; TUNIT-NEXT: ret void
294 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a2
295 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
296 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
298 ; CGSCC-NEXT: call void @sync()
299 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
300 ; CGSCC-NEXT: ret void
302 ; CGSCC-NEXT: call void @sync()
303 ; CGSCC-NEXT: ret void
305 br i1 %c, label %S, label %L
308 %v = load i32, ptr addrspace(3) @QA2
309 call void @use1(i32 %v)
312 store i32 2, ptr addrspace(3) @QA2
317 ; FIXME: We should not replace the load with undef.
318 define void @kernel4b2(i1 %c) "kernel" {
319 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b2
320 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
321 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
323 ; TUNIT-NEXT: call void @sync()
324 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
325 ; TUNIT-NEXT: ret void
327 ; TUNIT-NEXT: ret void
329 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b2
330 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
331 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
333 ; CGSCC-NEXT: call void @sync()
334 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
335 ; CGSCC-NEXT: ret void
337 ; CGSCC-NEXT: ret void
339 br i1 %c, label %S, label %L
342 %v = load i32, ptr addrspace(3) @QB2
343 call void @use1(i32 %v)
346 store i32 2, ptr addrspace(3) @QB2
350 define void @kernel4a3(i1 %c) "kernel" {
351 ; TUNIT-LABEL: define {{[^@]+}}@kernel4a3
352 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
353 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4
354 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
356 ; TUNIT-NEXT: call void @sync()
357 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
358 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
359 ; TUNIT-NEXT: ret void
361 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4
362 ; TUNIT-NEXT: call void @sync()
363 ; TUNIT-NEXT: call void @sync()
364 ; TUNIT-NEXT: call void @sync()
365 ; TUNIT-NEXT: call void @sync()
366 ; TUNIT-NEXT: ret void
368 ; CGSCC-LABEL: define {{[^@]+}}@kernel4a3
369 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
370 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4
371 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
373 ; CGSCC-NEXT: call void @sync()
374 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
375 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
376 ; CGSCC-NEXT: ret void
378 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4
379 ; CGSCC-NEXT: call void @sync()
380 ; CGSCC-NEXT: call void @sync()
381 ; CGSCC-NEXT: call void @sync()
382 ; CGSCC-NEXT: call void @sync()
383 ; CGSCC-NEXT: ret void
385 store i32 0, ptr addrspace(3) @QA3
386 br i1 %c, label %S, label %L
389 %v = load i32, ptr addrspace(3) @QA3
390 call void @use1(i32 %v)
393 store i32 2, ptr addrspace(3) @QA3
401 ; The load of QB3 should not be simplified to 0.
402 define void @kernel4b3(i1 %c) "kernel" {
403 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b3
404 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
405 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4
406 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
408 ; TUNIT-NEXT: call void @sync()
409 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
410 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
411 ; TUNIT-NEXT: ret void
413 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4
414 ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]]
415 ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7]]
416 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
417 ; TUNIT-NEXT: call void @use1(i32 3) #[[ATTR7]]
418 ; TUNIT-NEXT: ret void
420 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b3
421 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
422 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4
423 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
425 ; CGSCC-NEXT: call void @sync()
426 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
427 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
428 ; CGSCC-NEXT: ret void
430 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4
431 ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]]
432 ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]]
433 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
434 ; CGSCC-NEXT: call void @use1(i32 3) #[[ATTR6]]
435 ; CGSCC-NEXT: ret void
437 store i32 0, ptr addrspace(3) @QB3
438 br i1 %c, label %S, label %L
441 %v = load i32, ptr addrspace(3) @QB3
442 call void @use1(i32 %v)
445 store i32 2, ptr addrspace(3) @QB3
446 call void @use1(i32 0)
447 call void @use1(i32 1)
448 call void @use1(i32 2)
449 call void @use1(i32 3)
454 define void @kernel4c1(i1 %c) "kernel" {
455 ; TUNIT: Function Attrs: norecurse
456 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c1
457 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
458 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
460 ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]]
461 ; TUNIT-NEXT: ret void
463 ; TUNIT-NEXT: ret void
465 ; CGSCC: Function Attrs: norecurse
466 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c1
467 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
468 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
470 ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]]
471 ; CGSCC-NEXT: ret void
473 ; CGSCC-NEXT: ret void
475 store i32 0, ptr addrspace(3) @QC1
476 br i1 %c, label %S, label %L
478 call void @barrier();
479 %v = load i32, ptr addrspace(3) @QC1
480 call void @use1(i32 %v)
483 store i32 2, ptr addrspace(3) @QC1
484 call void @barrier();
488 ; We should not replace the load or delete the second store.
489 define void @kernel4d1(i1 %c) "kernel" {
490 ; TUNIT: Function Attrs: norecurse
491 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d1
492 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
493 ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4
494 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
496 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
497 ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
498 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
499 ; TUNIT-NEXT: ret void
501 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4
502 ; TUNIT-NEXT: ret void
504 ; CGSCC: Function Attrs: norecurse
505 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d1
506 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
507 ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4
508 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
510 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
511 ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
512 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
513 ; CGSCC-NEXT: ret void
515 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4
516 ; CGSCC-NEXT: ret void
518 store i32 0, ptr addrspace(3) @QD1
519 br i1 %c, label %S, label %L
521 call void @barrier();
522 %v = load i32, ptr addrspace(3) @QD1
523 call void @use1(i32 %v)
526 store i32 2, ptr addrspace(3) @QD1
530 define void @kernel4c2(i1 %c) "kernel" {
531 ; TUNIT: Function Attrs: norecurse
532 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c2
533 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
534 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
536 ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
537 ; TUNIT-NEXT: ret void
539 ; TUNIT-NEXT: ret void
541 ; CGSCC: Function Attrs: norecurse
542 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c2
543 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
544 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
546 ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
547 ; CGSCC-NEXT: ret void
549 ; CGSCC-NEXT: ret void
551 br i1 %c, label %S, label %L
553 call void @barrier();
554 %v = load i32, ptr addrspace(3) @QC2
555 call void @use1(i32 %v)
558 store i32 2, ptr addrspace(3) @QC2
559 call void @barrier();
563 ; We should not replace the load with undef.
564 define void @kernel4d2(i1 %c) "kernel" {
565 ; TUNIT: Function Attrs: norecurse
566 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d2
567 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
568 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
570 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
571 ; TUNIT-NEXT: ret void
573 ; TUNIT-NEXT: ret void
575 ; CGSCC: Function Attrs: norecurse
576 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d2
577 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
578 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
580 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
581 ; CGSCC-NEXT: ret void
583 ; CGSCC-NEXT: ret void
585 br i1 %c, label %S, label %L
587 call void @barrier();
588 %v = load i32, ptr addrspace(3) @QD2
589 call void @use1(i32 %v)
592 store i32 2, ptr addrspace(3) @QD2
596 define void @kernel4c3(i1 %c) "kernel" {
597 ; TUNIT: Function Attrs: norecurse
598 ; TUNIT-LABEL: define {{[^@]+}}@kernel4c3
599 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
600 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
602 ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
603 ; TUNIT-NEXT: ret void
605 ; TUNIT-NEXT: ret void
607 ; CGSCC: Function Attrs: norecurse
608 ; CGSCC-LABEL: define {{[^@]+}}@kernel4c3
609 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
610 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
612 ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
613 ; CGSCC-NEXT: ret void
615 ; CGSCC-NEXT: ret void
617 br i1 %c, label %S, label %L
619 call void @barrier();
620 %v = load i32, ptr addrspace(3) @QC3
621 call void @use1(i32 %v)
624 store i32 2, ptr addrspace(3) @QC3
625 call void @barrier();
629 ; We should not replace the load with undef.
630 define void @kernel4d3(i1 %c) "kernel" {
631 ; TUNIT: Function Attrs: norecurse
632 ; TUNIT-LABEL: define {{[^@]+}}@kernel4d3
633 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
634 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
636 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
637 ; TUNIT-NEXT: ret void
639 ; TUNIT-NEXT: ret void
641 ; CGSCC: Function Attrs: norecurse
642 ; CGSCC-LABEL: define {{[^@]+}}@kernel4d3
643 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
644 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
646 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
647 ; CGSCC-NEXT: ret void
649 ; CGSCC-NEXT: ret void
651 br i1 %c, label %S, label %L
653 call void @barrier();
654 %v = load i32, ptr addrspace(3) @QD3
655 call void @use1(i32 %v)
658 store i32 2, ptr addrspace(3) @QD3
662 define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
663 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
664 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
665 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
667 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
668 ; TUNIT-NEXT: ret void
670 ; TUNIT-NEXT: call void @sync()
671 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
672 ; TUNIT-NEXT: call void @sync()
673 ; TUNIT-NEXT: ret void
675 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
676 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
677 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
679 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
680 ; CGSCC-NEXT: ret void
682 ; CGSCC-NEXT: call void @sync()
683 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
684 ; CGSCC-NEXT: call void @sync()
685 ; CGSCC-NEXT: ret void
687 br i1 %c, label %S, label %L
689 call void @barrier();
690 %v = load i32, ptr addrspace(3) @UAA1
691 call void @use1(i32 %v)
695 store i32 2, ptr addrspace(3) @UAA1
696 call void @barrier();
701 define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
702 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
703 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
704 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
706 ; TUNIT-NEXT: call void @sync()
707 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
708 ; TUNIT-NEXT: ret void
710 ; TUNIT-NEXT: call void @sync()
711 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
712 ; TUNIT-NEXT: call void @sync()
713 ; TUNIT-NEXT: ret void
715 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
716 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
717 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
719 ; CGSCC-NEXT: call void @sync()
720 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
721 ; CGSCC-NEXT: ret void
723 ; CGSCC-NEXT: call void @sync()
724 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
725 ; CGSCC-NEXT: call void @sync()
726 ; CGSCC-NEXT: ret void
728 br i1 %c, label %S, label %L
731 %v = load i32, ptr addrspace(3) @UAA2
732 call void @use1(i32 %v)
736 store i32 2, ptr addrspace(3) @UAA2
737 call void @barrier();
742 define void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
743 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
744 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
745 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
747 ; TUNIT-NEXT: call void @sync()
748 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
749 ; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
750 ; TUNIT-NEXT: ret void
752 ; TUNIT-NEXT: call void @sync()
753 ; TUNIT-NEXT: call void @sync()
754 ; TUNIT-NEXT: ret void
756 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
757 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
758 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
760 ; CGSCC-NEXT: call void @sync()
761 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
762 ; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
763 ; CGSCC-NEXT: ret void
765 ; CGSCC-NEXT: call void @sync()
766 ; CGSCC-NEXT: call void @sync()
767 ; CGSCC-NEXT: ret void
769 br i1 %c, label %S, label %L
772 %v = load i32, ptr addrspace(3) @UAA3
773 call void @use1(i32 %v)
774 call void @barrier();
778 store i32 2, ptr addrspace(3) @UAA3
783 define void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
784 ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
785 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
786 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
788 ; TUNIT-NEXT: call void @sync()
789 ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
790 ; TUNIT-NEXT: ret void
792 ; TUNIT-NEXT: call void @sync()
793 ; TUNIT-NEXT: call void @sync()
794 ; TUNIT-NEXT: ret void
796 ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
797 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
798 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
800 ; CGSCC-NEXT: call void @sync()
801 ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
802 ; CGSCC-NEXT: ret void
804 ; CGSCC-NEXT: call void @sync()
805 ; CGSCC-NEXT: call void @sync()
806 ; CGSCC-NEXT: ret void
808 br i1 %c, label %S, label %L
811 %v = load i32, ptr addrspace(3) @UANA1
812 call void @use1(i32 %v)
816 store i32 2, ptr addrspace(3) @UANA1
822 declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
823 declare void @use1(i32) nosync norecurse nounwind nocallback
824 declare i32 @__kmpc_target_init(ptr, ptr) nocallback
825 declare void @__kmpc_target_deinit() nocallback
826 declare void @llvm.assume(i1)
828 !llvm.module.flags = !{!0, !1}
829 !nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20}
831 !0 = !{i32 7, !"openmp", i32 50}
832 !1 = !{i32 7, !"openmp-device", i32 50}
833 !2 = !{ptr @kernel, !"kernel", i32 1}
834 !3 = !{ptr @kernel2, !"kernel", i32 1}
835 !4 = !{ptr @kernel3, !"kernel", i32 1}
836 !5 = !{ptr @kernel4a1, !"kernel", i32 1}
837 !6 = !{ptr @kernel4b1, !"kernel", i32 1}
838 !7 = !{ptr @kernel4a2, !"kernel", i32 1}
839 !8 = !{ptr @kernel4b2, !"kernel", i32 1}
840 !9 = !{ptr @kernel4a3, !"kernel", i32 1}
841 !10 = !{ptr @kernel4b3, !"kernel", i32 1}
842 !11 = !{ptr @kernel4c1, !"kernel", i32 1}
843 !12 = !{ptr @kernel4d1, !"kernel", i32 1}
844 !13 = !{ptr @kernel4c2, !"kernel", i32 1}
845 !14 = !{ptr @kernel4d2, !"kernel", i32 1}
846 !15 = !{ptr @kernel4c3, !"kernel", i32 1}
847 !16 = !{ptr @kernel4d3, !"kernel", i32 1}
848 !17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
849 !18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
850 !19 = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
851 !20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
854 ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
855 ; TUNIT: attributes #[[ATTR1]] = { "kernel" }
856 ; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
857 ; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
858 ; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback }
859 ; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
860 ; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" }
861 ; TUNIT: attributes #[[ATTR7]] = { nounwind }
863 ; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
864 ; CGSCC: attributes #[[ATTR1]] = { "kernel" }
865 ; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
866 ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
867 ; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback }
868 ; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
869 ; CGSCC: attributes #[[ATTR6]] = { nounwind }
871 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
872 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
873 ; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
874 ; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
875 ; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
876 ; CHECK: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
877 ; CHECK: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
878 ; CHECK: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
879 ; CHECK: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
880 ; CHECK: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
881 ; CHECK: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
882 ; CHECK: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
883 ; CHECK: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
884 ; CHECK: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
885 ; CHECK: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
886 ; CHECK: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
887 ; CHECK: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
888 ; CHECK: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
889 ; CHECK: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
890 ; CHECK: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
891 ; CHECK: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}