1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefixes=CHECK,MODULE
3 ; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefixes=CHECK,CGSCC
4 ; REQUIRES: amdgpu-registered-target
6 target triple = "amdgcn-amd-amdhsa"
8 declare void @useI32(i32)
9 declare void @unknown()
10 declare void @aligned_barrier() "llvm.assume"="ompx_aligned_barrier"
11 declare void @llvm.nvvm.barrier0()
12 declare i32 @llvm.nvvm.barrier0.and(i32)
13 declare i32 @llvm.nvvm.barrier0.or(i32)
14 declare i32 @llvm.nvvm.barrier0.popc(i32)
15 declare void @llvm.amdgcn.s.barrier()
16 declare void @llvm.assume(i1)
19 ; CHECK: @GC1 = constant i32 42
20 ; CHECK: @GC2 = addrspace(4) global i32 0
21 ; CHECK: @GPtr4 = addrspace(4) global ptr addrspace(4) null
22 ; CHECK: @G = global i32 42
23 ; CHECK: @GS = addrspace(3) global i32 0
24 ; CHECK: @GPtr = global ptr null
25 ; CHECK: @PG1 = thread_local global i32 42
26 ; CHECK: @PG2 = addrspace(5) global i32 0
27 ; CHECK: @GPtr5 = global ptr addrspace(5) null
28 ; CHECK: @G1 = global i32 42
29 ; CHECK: @G2 = addrspace(1) global i32 0
31 define void @pos_empty_1(i1 %c) "kernel" {
32 ; MODULE-LABEL: define {{[^@]+}}@pos_empty_1
33 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
34 ; MODULE-NEXT: ret void
36 ; CGSCC-LABEL: define {{[^@]+}}@pos_empty_1
37 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
38 ; CGSCC-NEXT: call void @llvm.assume(i1 [[C]])
39 ; CGSCC-NEXT: call void @unknown() #[[ATTR0:[0-9]+]]
40 ; CGSCC-NEXT: call void @llvm.assume(i1 [[C]])
41 ; CGSCC-NEXT: ret void
43 call void @llvm.assume(i1 %c)
44 call void @unknown() "llvm.assume"="ompx_aligned_barrier"
45 call void @llvm.assume(i1 %c)
48 define void @pos_empty_2() "kernel" {
49 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_2
50 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
51 ; CHECK-NEXT: ret void
53 call void @aligned_barrier()
56 define void @pos_empty_3() "kernel" {
57 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_3
58 ; CHECK-SAME: () #[[ATTR4]] {
59 ; CHECK-NEXT: ret void
61 call void @llvm.nvvm.barrier0()
64 define void @pos_empty_4() "kernel" {
65 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_4
66 ; CHECK-SAME: () #[[ATTR4]] {
67 ; CHECK-NEXT: ret void
69 call i32 @llvm.nvvm.barrier0.and(i32 0)
72 define void @pos_empty_5() "kernel" {
73 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_5
74 ; CHECK-SAME: () #[[ATTR4]] {
75 ; CHECK-NEXT: ret void
77 call i32 @llvm.nvvm.barrier0.or(i32 0)
80 define void @pos_empty_6() "kernel" {
81 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_6
82 ; CHECK-SAME: () #[[ATTR4]] {
83 ; CHECK-NEXT: ret void
85 call i32 @llvm.nvvm.barrier0.popc(i32 0)
88 define void @pos_empty_7a() "kernel" {
89 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a
90 ; CHECK-SAME: () #[[ATTR4]] {
91 ; CHECK-NEXT: call void @unknown()
92 ; CHECK-NEXT: ret void
94 call void @llvm.amdgcn.s.barrier()
98 ; FIXME: We should remove the barrier.
99 define void @pos_empty_7b() "kernel" {
100 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b
101 ; CHECK-SAME: () #[[ATTR4]] {
102 ; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
103 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
104 ; CHECK-NEXT: call void @unknown()
105 ; CHECK-NEXT: ret void
107 call void @unknown() nosync readnone
108 call void @llvm.amdgcn.s.barrier()
112 define void @pos_empty_8(i1 %c) "kernel" {
113 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_8
114 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
115 ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
117 ; CHECK-NEXT: br label [[F]]
119 ; CHECK-NEXT: ret void
121 br i1 %c, label %t, label %f
124 call void @llvm.amdgcn.s.barrier() "llvm.assume"="ompx_aligned_barrier"
129 define void @neg_empty_8() "kernel" {
130 ; CHECK-LABEL: define {{[^@]+}}@neg_empty_8
131 ; CHECK-SAME: () #[[ATTR4]] {
132 ; CHECK-NEXT: call void @unknown()
133 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
134 ; CHECK-NEXT: ret void
137 call void @llvm.amdgcn.s.barrier()
140 define void @neg_empty_9(i1 %c) "kernel" {
141 ; CHECK-LABEL: define {{[^@]+}}@neg_empty_9
142 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
143 ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
145 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
146 ; CHECK-NEXT: fence release
147 ; CHECK-NEXT: br label [[M:%.*]]
149 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
150 ; CHECK-NEXT: fence release
151 ; CHECK-NEXT: br label [[M]]
153 ; CHECK-NEXT: fence release
154 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
155 ; CHECK-NEXT: fence release
156 ; CHECK-NEXT: ret void
158 br i1 %c, label %t, label %f
161 call void @llvm.amdgcn.s.barrier()
166 call void @llvm.amdgcn.s.barrier()
171 call void @llvm.amdgcn.s.barrier()
175 ; FIXME: We should remove the barrier
176 define void @pos_empty_10() "kernel" {
177 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_10
178 ; CHECK-SAME: () #[[ATTR4]] {
179 ; CHECK-NEXT: br label [[M:%.*]]
181 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
182 ; CHECK-NEXT: ret void
186 call void @llvm.amdgcn.s.barrier()
189 define void @pos_empty_11() "kernel" {
190 ; CHECK-LABEL: define {{[^@]+}}@pos_empty_11
191 ; CHECK-SAME: () #[[ATTR4]] {
192 ; CHECK-NEXT: br label [[M:%.*]]
194 ; CHECK-NEXT: ret void
198 call void @aligned_barrier()
199 call void @llvm.amdgcn.s.barrier()
202 define void @empty() {
203 ; CHECK-LABEL: define {{[^@]+}}@empty() {
204 ; CHECK-NEXT: ret void
208 ; FIXME: We should remove the barrier in the end but not the first one.
209 define void @neg_empty_12(i1 %c) "kernel" {
210 ; MODULE-LABEL: define {{[^@]+}}@neg_empty_12
211 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
212 ; MODULE-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
214 ; MODULE-NEXT: call void @llvm.amdgcn.s.barrier()
215 ; MODULE-NEXT: br label [[M:%.*]]
217 ; MODULE-NEXT: br label [[M]]
219 ; MODULE-NEXT: call void @llvm.amdgcn.s.barrier()
220 ; MODULE-NEXT: ret void
222 ; CGSCC-LABEL: define {{[^@]+}}@neg_empty_12
223 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
224 ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
226 ; CGSCC-NEXT: call void @empty()
227 ; CGSCC-NEXT: call void @llvm.amdgcn.s.barrier()
228 ; CGSCC-NEXT: br label [[M:%.*]]
230 ; CGSCC-NEXT: call void @empty()
231 ; CGSCC-NEXT: br label [[M]]
233 ; CGSCC-NEXT: call void @llvm.amdgcn.s.barrier()
234 ; CGSCC-NEXT: ret void
236 br i1 %c, label %t, label %f
239 call void @llvm.amdgcn.s.barrier()
245 call void @llvm.amdgcn.s.barrier()
248 define void @neg_empty_1() "kernel" {
249 ; CHECK-LABEL: define {{[^@]+}}@neg_empty_1
250 ; CHECK-SAME: () #[[ATTR4]] {
251 ; CHECK-NEXT: call void @unknown()
252 ; CHECK-NEXT: ret void
257 define void @neg_empty_2() "kernel" {
258 ; CHECK-LABEL: define {{[^@]+}}@neg_empty_2
259 ; CHECK-SAME: () #[[ATTR4]] {
260 ; CHECK-NEXT: ret void
262 call void @aligned_barrier()
266 @GC1 = constant i32 42
267 @GC2 = addrspace(4) global i32 0
268 @GPtr4 = addrspace(4) global ptr addrspace(4) null
269 define void @pos_constant_loads() "kernel" {
270 ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads
271 ; CHECK-SAME: () #[[ATTR4]] {
272 ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8
273 ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4
274 ; CHECK-NEXT: [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4
275 ; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]]
276 ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
277 ; CHECK-NEXT: call void @useI32(i32 [[E]])
278 ; CHECK-NEXT: ret void
280 %GPtr4c = addrspacecast ptr addrspace(4) @GPtr4 to ptr
281 %arg = load ptr addrspace(4), ptr %GPtr4c
282 %a = load i32, ptr @GC1
283 call void @aligned_barrier()
284 %GC2c = addrspacecast ptr addrspace(4) @GC2 to ptr
285 %b = load i32, ptr %GC2c
286 call void @aligned_barrier()
287 %argc = addrspacecast ptr addrspace(4) %arg to ptr
288 %c = load i32, ptr %argc
289 call void @aligned_barrier()
292 call void @useI32(i32 %e)
296 @GS = addrspace(3) global i32 0
297 @GPtr = global ptr null
298 ; TODO: We could remove some of the barriers due to the lack of write effects.
299 define void @neg_loads() "kernel" {
300 ; CHECK-LABEL: define {{[^@]+}}@neg_loads
301 ; CHECK-SAME: () #[[ATTR4]] {
302 ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
303 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4
304 ; CHECK-NEXT: call void @aligned_barrier()
305 ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4
306 ; CHECK-NEXT: call void @aligned_barrier()
307 ; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4
308 ; CHECK-NEXT: call void @aligned_barrier()
309 ; CHECK-NEXT: [[D:%.*]] = add i32 [[A]], [[B]]
310 ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
311 ; CHECK-NEXT: call void @useI32(i32 [[E]])
312 ; CHECK-NEXT: ret void
314 %arg = load ptr, ptr @GPtr
315 %a = load i32, ptr @G
316 call void @aligned_barrier()
317 %GSc = addrspacecast ptr addrspace(3) @GS to ptr
318 %b = load i32, ptr %GSc
319 call void @aligned_barrier()
320 %c = load i32, ptr %arg
321 call void @aligned_barrier()
324 call void @useI32(i32 %e)
327 @PG1 = thread_local global i32 42
328 @PG2 = addrspace(5) global i32 0
329 @GPtr5 = global ptr addrspace(5) null
330 define void @pos_priv_mem() "kernel" {
331 ; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem
332 ; CHECK-SAME: () #[[ATTR4]] {
333 ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4
334 ; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4, addrspace(5)
335 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4
336 ; CHECK-NEXT: store i32 [[A]], ptr addrspace(5) [[LOC]], align 4
337 ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4
338 ; CHECK-NEXT: store i32 [[B]], ptr addrspace(5) [[ARG]], align 4
339 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(5) [[LOC]], align 4
340 ; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4
341 ; CHECK-NEXT: ret void
343 %arg = load ptr addrspace(5), ptr @GPtr5
344 %loc = alloca i32, addrspace(5)
345 %a = load i32, ptr @PG1
346 call void @aligned_barrier()
347 store i32 %a, ptr addrspace(5) %loc
348 %PG2c = addrspacecast ptr addrspace(5) @PG2 to ptr
349 %b = load i32, ptr %PG2c
350 call void @aligned_barrier()
351 %argc = addrspacecast ptr addrspace(5) %arg to ptr
352 store i32 %b, ptr %argc
353 call void @aligned_barrier()
354 %v = load i32, ptr addrspace(5) %loc
355 store i32 %v, ptr @PG1
356 call void @aligned_barrier()
360 @G2 = addrspace(1) global i32 0
361 define void @neg_mem() "kernel" {
362 ; CHECK-LABEL: define {{[^@]+}}@neg_mem
363 ; CHECK-SAME: () #[[ATTR4]] {
364 ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
365 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4
366 ; CHECK-NEXT: fence seq_cst
367 ; CHECK-NEXT: call void @aligned_barrier()
368 ; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4
369 ; CHECK-NEXT: fence release
370 ; CHECK-NEXT: call void @aligned_barrier()
371 ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4
372 ; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4
373 ; CHECK-NEXT: fence acquire
374 ; CHECK-NEXT: ret void
376 %arg = load ptr, ptr @GPtr
377 %a = load i32, ptr @G1
379 call void @aligned_barrier()
380 store i32 %a, ptr %arg
382 call void @aligned_barrier()
383 %G2c = addrspacecast ptr addrspace(1) @G2 to ptr
384 %b = load i32, ptr %G2c
385 store i32 %b, ptr @G1
387 call void @aligned_barrier()
391 define void @pos_multiple() "kernel" {
392 ; CHECK-LABEL: define {{[^@]+}}@pos_multiple
393 ; CHECK-SAME: () #[[ATTR4]] {
394 ; CHECK-NEXT: ret void
396 call void @llvm.nvvm.barrier0()
397 call void @aligned_barrier()
398 call void @aligned_barrier()
399 call void @llvm.amdgcn.s.barrier()
400 call void @aligned_barrier()
401 call void @llvm.nvvm.barrier0()
402 call void @aligned_barrier()
403 call void @aligned_barrier()
407 define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
408 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1
409 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
410 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
412 ; CHECK-NEXT: br label [[T0B:%.*]]
414 ; CHECK-NEXT: br label [[M:%.*]]
416 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
418 ; CHECK-NEXT: br label [[M]]
420 ; CHECK-NEXT: br label [[M]]
422 ; CHECK-NEXT: ret void
425 call void @llvm.nvvm.barrier0()
427 call void @aligned_barrier()
429 br i1 %c0, label %t0, label %f0
432 call void @aligned_barrier()
437 call void @aligned_barrier()
442 call void @aligned_barrier()
444 call void @llvm.nvvm.barrier0()
446 br i1 %c1, label %t1, label %f1
449 call void @aligned_barrier()
454 call void @aligned_barrier()
459 call void @aligned_barrier()
464 define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
465 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2
466 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
467 ; CHECK-NEXT: store i32 4, ptr [[P]], align 4
468 ; CHECK-NEXT: call void @aligned_barrier()
469 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
471 ; CHECK-NEXT: br label [[T0B:%.*]]
473 ; CHECK-NEXT: br label [[M:%.*]]
475 ; CHECK-NEXT: store i32 4, ptr [[P]], align 4
476 ; CHECK-NEXT: call void @llvm.nvvm.barrier0()
477 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
479 ; CHECK-NEXT: br label [[M]]
481 ; CHECK-NEXT: br label [[M]]
483 ; CHECK-NEXT: store i32 4, ptr [[P]], align 4
484 ; CHECK-NEXT: ret void
486 call void @llvm.nvvm.barrier0()
488 call void @aligned_barrier()
489 br i1 %c0, label %t0, label %f0
491 call void @aligned_barrier()
494 call void @aligned_barrier()
497 call void @aligned_barrier()
499 call void @llvm.nvvm.barrier0()
500 br i1 %c1, label %t1, label %f1
502 call void @aligned_barrier()
505 call void @aligned_barrier()
509 call void @aligned_barrier()
513 define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) "kernel" {
514 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1
515 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
516 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
518 ; CHECK-NEXT: br label [[T0B:%.*]]
520 ; CHECK-NEXT: br label [[M:%.*]]
522 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
524 ; CHECK-NEXT: br label [[M]]
526 ; CHECK-NEXT: br label [[M]]
528 ; CHECK-NEXT: ret void
530 call void @llvm.nvvm.barrier0()
531 call void @aligned_barrier()
532 br i1 %c0, label %t0, label %f0
534 call void @aligned_barrier()
537 call void @aligned_barrier()
540 call void @aligned_barrier()
541 call void @llvm.nvvm.barrier0()
542 br i1 %c1, label %t1, label %f1
544 call void @aligned_barrier()
547 call void @aligned_barrier()
550 call void @aligned_barrier()
554 define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) "kernel" {
555 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2
556 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
557 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
559 ; CHECK-NEXT: br label [[T0B:%.*]]
561 ; CHECK-NEXT: br label [[M:%.*]]
563 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
565 ; CHECK-NEXT: br label [[M]]
567 ; CHECK-NEXT: br label [[M]]
569 ; CHECK-NEXT: ret void
571 br i1 %c0, label %t0, label %f0
573 call void @aligned_barrier()
576 call void @aligned_barrier()
579 call void @aligned_barrier()
580 call void @llvm.nvvm.barrier0()
581 br i1 %c1, label %t1, label %f1
583 call void @aligned_barrier()
586 call void @aligned_barrier()
589 call void @aligned_barrier()
593 define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) "kernel" {
594 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3
595 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
596 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
598 ; CHECK-NEXT: br label [[T0B:%.*]]
600 ; CHECK-NEXT: br label [[M:%.*]]
602 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
604 ; CHECK-NEXT: br label [[M]]
606 ; CHECK-NEXT: br label [[M]]
608 ; CHECK-NEXT: ret void
610 br i1 %c0, label %t0, label %f0
616 call void @aligned_barrier()
617 call void @llvm.nvvm.barrier0()
618 br i1 %c1, label %t1, label %f1
620 call void @aligned_barrier()
623 call void @aligned_barrier()
626 call void @aligned_barrier()
630 define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) "kernel" {
631 ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1
632 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
633 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
634 ; CHECK-NEXT: call void @aligned_barrier()
635 ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
637 ; CHECK-NEXT: store i32 1, ptr [[P]], align 4
638 ; CHECK-NEXT: br label [[T0B:%.*]]
640 ; CHECK-NEXT: call void @aligned_barrier()
641 ; CHECK-NEXT: br label [[M:%.*]]
643 ; CHECK-NEXT: store i32 2, ptr [[P]], align 4
644 ; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
646 ; CHECK-NEXT: call void @aligned_barrier()
647 ; CHECK-NEXT: br label [[M]]
649 ; CHECK-NEXT: call void @aligned_barrier()
650 ; CHECK-NEXT: br label [[M]]
652 ; CHECK-NEXT: store i32 3, ptr [[P]], align 4
653 ; CHECK-NEXT: ret void
655 call void @aligned_barrier()
657 call void @aligned_barrier()
658 br i1 %c0, label %t0, label %f0
660 call void @aligned_barrier()
664 call void @aligned_barrier()
667 call void @aligned_barrier()
668 call void @llvm.nvvm.barrier0()
670 br i1 %c1, label %t1, label %f1
672 call void @aligned_barrier()
675 call void @aligned_barrier()
678 call void @aligned_barrier()
680 call void @aligned_barrier()
684 define internal void @write_then_barrier0(ptr %p) {
685 ; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
686 ; CHECK-SAME: (ptr [[P:%.*]]) {
687 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
688 ; CHECK-NEXT: call void @aligned_barrier()
689 ; CHECK-NEXT: ret void
692 call void @aligned_barrier()
695 define internal void @barrier_then_write0(ptr %p) {
696 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
697 ; MODULE-SAME: (ptr [[P:%.*]]) {
698 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
699 ; MODULE-NEXT: ret void
701 ; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
702 ; CGSCC-SAME: (ptr [[P:%.*]]) {
703 ; CGSCC-NEXT: call void @aligned_barrier()
704 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
705 ; CGSCC-NEXT: ret void
707 call void @aligned_barrier()
711 define internal void @barrier_then_write_then_barrier0(ptr %p) {
712 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
713 ; MODULE-SAME: (ptr [[P:%.*]]) {
714 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
715 ; MODULE-NEXT: call void @aligned_barrier()
716 ; MODULE-NEXT: ret void
718 ; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
719 ; CGSCC-SAME: (ptr [[P:%.*]]) {
720 ; CGSCC-NEXT: call void @aligned_barrier()
721 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
722 ; CGSCC-NEXT: call void @aligned_barrier()
723 ; CGSCC-NEXT: ret void
725 call void @aligned_barrier()
727 call void @aligned_barrier()
730 define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" {
731 ; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
732 ; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
733 ; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]])
734 ; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
736 ; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]])
737 ; MODULE-NEXT: br label [[T0B3:%.*]]
739 ; MODULE-NEXT: br label [[M3:%.*]]
741 ; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]])
742 ; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
744 ; MODULE-NEXT: br label [[M3]]
746 ; MODULE-NEXT: br label [[M3]]
748 ; MODULE-NEXT: call void @write_then_barrier0(ptr [[P]])
749 ; MODULE-NEXT: ret void
751 ; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
752 ; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
753 ; CGSCC-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]])
754 ; CGSCC-NEXT: call void @aligned_barrier()
755 ; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
757 ; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]])
758 ; CGSCC-NEXT: br label [[T0B3:%.*]]
760 ; CGSCC-NEXT: call void @aligned_barrier()
761 ; CGSCC-NEXT: br label [[M3:%.*]]
763 ; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]])
764 ; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
766 ; CGSCC-NEXT: call void @aligned_barrier()
767 ; CGSCC-NEXT: br label [[M3]]
769 ; CGSCC-NEXT: call void @aligned_barrier()
770 ; CGSCC-NEXT: br label [[M3]]
772 ; CGSCC-NEXT: call void @write_then_barrier0(ptr [[P]])
773 ; CGSCC-NEXT: ret void
775 call void @barrier_then_write_then_barrier0(ptr %p)
776 call void @aligned_barrier()
777 br i1 %c0, label %t03, label %f03
779 call void @barrier_then_write0(ptr %p)
782 call void @aligned_barrier()
785 call void @aligned_barrier()
786 call void @barrier_then_write0(ptr %p)
787 br i1 %c1, label %t13, label %f13
789 call void @aligned_barrier()
792 call void @aligned_barrier()
795 call void @aligned_barrier()
796 call void @write_then_barrier0(ptr %p)
799 define internal void @write_then_barrier1(ptr %p) {
800 ; CHECK-LABEL: define {{[^@]+}}@write_then_barrier1
801 ; CHECK-SAME: (ptr [[P:%.*]]) {
802 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
803 ; CHECK-NEXT: call void @aligned_barrier()
804 ; CHECK-NEXT: ret void
807 call void @aligned_barrier()
810 define internal void @barrier_then_write1(ptr %p) {
811 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write1
812 ; MODULE-SAME: (ptr [[P:%.*]]) {
813 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
814 ; MODULE-NEXT: ret void
816 ; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write1
817 ; CGSCC-SAME: (ptr [[P:%.*]]) {
818 ; CGSCC-NEXT: call void @aligned_barrier()
819 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
820 ; CGSCC-NEXT: ret void
822 call void @aligned_barrier()
826 define internal void @barrier_then_write_then_barrier1(ptr %p) {
827 ; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier1
828 ; CHECK-SAME: (ptr [[P:%.*]]) {
829 ; CHECK-NEXT: call void @aligned_barrier()
830 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
831 ; CHECK-NEXT: call void @aligned_barrier()
832 ; CHECK-NEXT: ret void
834 call void @aligned_barrier()
836 call void @aligned_barrier()
839 define void @multiple_blocks_functions_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) {
840 ; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
841 ; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
842 ; MODULE-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]])
843 ; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
845 ; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]])
846 ; MODULE-NEXT: br label [[T0B3:%.*]]
848 ; MODULE-NEXT: call void @aligned_barrier()
849 ; MODULE-NEXT: br label [[M3:%.*]]
851 ; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]])
852 ; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
854 ; MODULE-NEXT: call void @aligned_barrier()
855 ; MODULE-NEXT: br label [[M3]]
857 ; MODULE-NEXT: call void @aligned_barrier()
858 ; MODULE-NEXT: br label [[M3]]
860 ; MODULE-NEXT: call void @write_then_barrier1(ptr [[P]])
861 ; MODULE-NEXT: ret void
863 ; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
864 ; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
865 ; CGSCC-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]])
866 ; CGSCC-NEXT: call void @aligned_barrier()
867 ; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
869 ; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]])
870 ; CGSCC-NEXT: br label [[T0B3:%.*]]
872 ; CGSCC-NEXT: call void @aligned_barrier()
873 ; CGSCC-NEXT: br label [[M3:%.*]]
875 ; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]])
876 ; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
878 ; CGSCC-NEXT: call void @aligned_barrier()
879 ; CGSCC-NEXT: br label [[M3]]
881 ; CGSCC-NEXT: call void @aligned_barrier()
882 ; CGSCC-NEXT: br label [[M3]]
884 ; CGSCC-NEXT: call void @write_then_barrier1(ptr [[P]])
885 ; CGSCC-NEXT: ret void
887 call void @barrier_then_write_then_barrier1(ptr %p)
888 call void @aligned_barrier()
889 br i1 %c0, label %t03, label %f03
891 call void @barrier_then_write1(ptr %p)
894 call void @aligned_barrier()
897 call void @aligned_barrier()
898 call void @barrier_then_write1(ptr %p)
899 br i1 %c1, label %t13, label %f13
901 call void @aligned_barrier()
904 call void @aligned_barrier()
907 call void @aligned_barrier()
908 call void @write_then_barrier1(ptr %p)
912 define internal void @write_then_barrier2(ptr %p) {
913 ; CHECK-LABEL: define {{[^@]+}}@write_then_barrier2
914 ; CHECK-SAME: (ptr [[P:%.*]]) {
915 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
916 ; CHECK-NEXT: call void @aligned_barrier()
917 ; CHECK-NEXT: ret void
920 call void @aligned_barrier()
923 define internal void @barrier_then_write2(ptr %p) {
924 ; CHECK-LABEL: define {{[^@]+}}@barrier_then_write2
925 ; CHECK-SAME: (ptr [[P:%.*]]) {
926 ; CHECK-NEXT: call void @aligned_barrier()
927 ; CHECK-NEXT: store i32 0, ptr [[P]], align 4
928 ; CHECK-NEXT: ret void
930 call void @aligned_barrier()
934 define internal void @barrier_then_write_then_barrier2(ptr %p) {
935 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2
936 ; MODULE-SAME: (ptr [[P:%.*]]) {
937 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
938 ; MODULE-NEXT: call void @aligned_barrier()
939 ; MODULE-NEXT: ret void
941 ; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2
942 ; CGSCC-SAME: (ptr [[P:%.*]]) {
943 ; CGSCC-NEXT: call void @aligned_barrier()
944 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
945 ; CGSCC-NEXT: call void @aligned_barrier()
946 ; CGSCC-NEXT: ret void
948 call void @aligned_barrier()
950 call void @aligned_barrier()
953 define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
954 ; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
955 ; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
956 ; MODULE-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
957 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
958 ; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
960 ; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]])
961 ; MODULE-NEXT: br label [[T0B3:%.*]]
963 ; MODULE-NEXT: call void @aligned_barrier()
964 ; MODULE-NEXT: br label [[M3:%.*]]
966 ; MODULE-NEXT: call void @aligned_barrier()
967 ; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]])
968 ; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
970 ; MODULE-NEXT: call void @aligned_barrier()
971 ; MODULE-NEXT: br label [[M3]]
973 ; MODULE-NEXT: call void @aligned_barrier()
974 ; MODULE-NEXT: br label [[M3]]
976 ; MODULE-NEXT: call void @write_then_barrier2(ptr [[P]])
977 ; MODULE-NEXT: store i32 0, ptr [[P]], align 4
978 ; MODULE-NEXT: ret void
980 ; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
981 ; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
982 ; CGSCC-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
983 ; CGSCC-NEXT: call void @aligned_barrier()
984 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
985 ; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
987 ; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]])
988 ; CGSCC-NEXT: br label [[T0B3:%.*]]
990 ; CGSCC-NEXT: call void @aligned_barrier()
991 ; CGSCC-NEXT: br label [[M3:%.*]]
993 ; CGSCC-NEXT: call void @aligned_barrier()
994 ; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]])
995 ; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
997 ; CGSCC-NEXT: call void @aligned_barrier()
998 ; CGSCC-NEXT: br label [[M3]]
1000 ; CGSCC-NEXT: call void @aligned_barrier()
1001 ; CGSCC-NEXT: br label [[M3]]
1003 ; CGSCC-NEXT: call void @write_then_barrier2(ptr [[P]])
1004 ; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
1005 ; CGSCC-NEXT: ret void
1007 call void @barrier_then_write_then_barrier2(ptr %p)
1008 call void @aligned_barrier()
1010 br i1 %c0, label %t03, label %f03
1012 call void @barrier_then_write2(ptr %p)
1015 call void @aligned_barrier()
1018 call void @aligned_barrier()
1019 call void @barrier_then_write2(ptr %p)
1020 br i1 %c1, label %t13, label %f13
1022 call void @aligned_barrier()
1025 call void @aligned_barrier()
1028 call void @aligned_barrier()
1029 call void @write_then_barrier2(ptr %p)
1034 ; Verify we do not remove the barrier in the callee.
1035 define internal void @callee_barrier() {
1036 ; CHECK-LABEL: define {{[^@]+}}@callee_barrier() {
1037 ; CHECK-NEXT: call void @aligned_barrier()
1038 ; CHECK-NEXT: ret void
1040 call void @aligned_barrier()
1043 define void @caller_barrier1() "kernel" {
1044 ; CHECK-LABEL: define {{[^@]+}}@caller_barrier1
1045 ; CHECK-SAME: () #[[ATTR4]] {
1046 ; CHECK-NEXT: call void @callee_barrier()
1047 ; CHECK-NEXT: ret void
1049 call void @aligned_barrier()
1050 call void @callee_barrier()
1051 call void @aligned_barrier()
1054 define void @caller_barrier2() "kernel" {
1055 ; CHECK-LABEL: define {{[^@]+}}@caller_barrier2
1056 ; CHECK-SAME: () #[[ATTR4]] {
1057 ; CHECK-NEXT: call void @unknown()
1058 ; CHECK-NEXT: call void @callee_barrier()
1059 ; CHECK-NEXT: call void @unknown()
1060 ; CHECK-NEXT: ret void
1062 call void @unknown()
1063 call void @callee_barrier()
1064 call void @unknown()
1068 define void @loop_barrier() "kernel" {
1069 ; CHECK-LABEL: define {{[^@]+}}@loop_barrier
1070 ; CHECK-SAME: () #[[ATTR4]] {
1071 ; CHECK-NEXT: entry:
1072 ; CHECK-NEXT: br label [[LOOP:%.*]]
1074 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1075 ; CHECK-NEXT: call void @unknown()
1076 ; CHECK-NEXT: call void @aligned_barrier()
1077 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1078 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1079 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1081 ; CHECK-NEXT: ret void
1087 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1088 call void @unknown()
1089 call void @aligned_barrier()
1090 %i.next = add nuw nsw i32 %i, 1
1091 %cond = icmp ne i32 %i.next, 128
1092 br i1 %cond, label %loop, label %exit
1098 define void @loop_barrier_end_barriers() "kernel" {
1099 ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
1100 ; CHECK-SAME: () #[[ATTR4]] {
1101 ; CHECK-NEXT: entry:
1102 ; CHECK-NEXT: br label [[LOOP:%.*]]
1104 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1105 ; CHECK-NEXT: call void @unknown()
1106 ; CHECK-NEXT: call void @aligned_barrier()
1107 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1108 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1109 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1111 ; CHECK-NEXT: ret void
1117 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1118 call void @unknown()
1119 call void @aligned_barrier()
1120 %i.next = add nuw nsw i32 %i, 1
1121 %cond = icmp ne i32 %i.next, 128
1122 br i1 %cond, label %loop, label %exit
1125 call void @aligned_barrier()
1126 call void @aligned_barrier()
1127 call void @aligned_barrier()
1128 call void @aligned_barrier()
1132 define void @loop_barrier_end_barriers_unknown() "kernel" {
1133 ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
1134 ; CHECK-SAME: () #[[ATTR4]] {
1135 ; CHECK-NEXT: entry:
1136 ; CHECK-NEXT: br label [[LOOP:%.*]]
1138 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1139 ; CHECK-NEXT: call void @unknown()
1140 ; CHECK-NEXT: call void @aligned_barrier()
1141 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1142 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1143 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1145 ; CHECK-NEXT: call void @unknown()
1146 ; CHECK-NEXT: ret void
1152 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1153 call void @unknown()
1154 call void @aligned_barrier()
1155 %i.next = add nuw nsw i32 %i, 1
1156 %cond = icmp ne i32 %i.next, 128
1157 br i1 %cond, label %loop, label %exit
1160 call void @aligned_barrier()
1161 call void @aligned_barrier()
1162 call void @unknown()
1163 call void @aligned_barrier()
1164 call void @aligned_barrier()
1168 define void @loop_barrier_store() "kernel" {
1169 ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
1170 ; CHECK-SAME: () #[[ATTR4]] {
1171 ; CHECK-NEXT: entry:
1172 ; CHECK-NEXT: br label [[LOOP:%.*]]
1174 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1175 ; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
1176 ; CHECK-NEXT: call void @aligned_barrier()
1177 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1178 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1179 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1181 ; CHECK-NEXT: ret void
1187 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1188 store i32 %i, ptr @G1
1189 call void @aligned_barrier()
1190 %i.next = add nuw nsw i32 %i, 1
1191 %cond = icmp ne i32 %i.next, 128
1192 br i1 %cond, label %loop, label %exit
1198 define void @loop_barrier_end_barriers_store() "kernel" {
1199 ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
1200 ; CHECK-SAME: () #[[ATTR4]] {
1201 ; CHECK-NEXT: entry:
1202 ; CHECK-NEXT: br label [[LOOP:%.*]]
1204 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1205 ; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
1206 ; CHECK-NEXT: call void @aligned_barrier()
1207 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1208 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1209 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1211 ; CHECK-NEXT: store i32 [[I_NEXT]], ptr @G1, align 4
1212 ; CHECK-NEXT: ret void
1218 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1219 store i32 %i, ptr @G1
1220 call void @aligned_barrier()
1221 %i.next = add nuw nsw i32 %i, 1
1222 %cond = icmp ne i32 %i.next, 128
1223 br i1 %cond, label %loop, label %exit
1226 call void @aligned_barrier()
1227 call void @aligned_barrier()
1228 store i32 %i.next, ptr @G1
1229 call void @aligned_barrier()
1230 call void @aligned_barrier()
1234 !llvm.module.flags = !{!16,!15}
1235 !nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30}
1237 !0 = !{ptr @pos_empty_1, !"kernel", i32 1}
1238 !1 = !{ptr @pos_empty_2, !"kernel", i32 1}
1239 !2 = !{ptr @pos_empty_3, !"kernel", i32 1}
1240 !3 = !{ptr @pos_empty_4, !"kernel", i32 1}
1241 !4 = !{ptr @pos_empty_5, !"kernel", i32 1}
1242 !5 = !{ptr @pos_empty_6, !"kernel", i32 1}
1243 !17 = !{ptr @pos_empty_7a, !"kernel", i32 1}
1244 !18 = !{ptr @pos_empty_7b, !"kernel", i32 1}
1245 !23 = !{ptr @pos_empty_8, !"kernel", i32 1}
1246 !24 = !{ptr @caller_barrier1, !"kernel", i32 1}
1247 !25 = !{ptr @caller_barrier2, !"kernel", i32 1}
1248 !26 = !{ptr @loop_barrier, !"kernel", i32 1}
1249 !27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
1250 !28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
1251 !29 = !{ptr @loop_barrier_store, !"kernel", i32 1}
1252 !30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
1253 !6 = !{ptr @neg_empty_8, !"kernel", i32 1}
1254 !19 = !{ptr @neg_empty_9, !"kernel", i32 1}
1255 !20 = !{ptr @pos_empty_10, !"kernel", i32 1}
1256 !21 = !{ptr @pos_empty_11, !"kernel", i32 1}
1257 !22 = !{ptr @neg_empty_12, !"kernel", i32 1}
1258 !7 = !{ptr @pos_constant_loads, !"kernel", i32 1}
1259 !8 = !{ptr @neg_loads, !"kernel", i32 1}
1260 !9 = !{ptr @pos_priv_mem, !"kernel", i32 1}
1261 !10 = !{ptr @neg_mem, !"kernel", i32 1}
1262 !11 = !{ptr @pos_multiple, !"kernel", i32 1}
1263 !12 = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
1264 !13 = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
1265 !14 = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
1266 !15 = !{i32 7, !"openmp", i32 50}
1267 !16 = !{i32 7, !"openmp-device", i32 50}
1269 ; MODULE: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
1270 ; MODULE: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
1271 ; MODULE: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
1272 ; MODULE: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
1273 ; MODULE: attributes #[[ATTR4]] = { "kernel" }
1274 ; MODULE: attributes #[[ATTR5]] = { nosync memory(none) }
1276 ; CGSCC: attributes #[[ATTR0]] = { "llvm.assume"="ompx_aligned_barrier" }
1277 ; CGSCC: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
1278 ; CGSCC: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
1279 ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
1280 ; CGSCC: attributes #[[ATTR4]] = { "kernel" }
1281 ; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) }
1283 ; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1284 ; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1285 ; MODULE: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
1286 ; MODULE: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
1287 ; MODULE: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
1288 ; MODULE: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
1289 ; MODULE: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
1290 ; MODULE: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
1291 ; MODULE: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
1292 ; MODULE: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
1293 ; MODULE: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
1294 ; MODULE: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
1295 ; MODULE: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
1296 ; MODULE: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
1297 ; MODULE: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
1298 ; MODULE: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
1299 ; MODULE: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
1300 ; MODULE: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
1301 ; MODULE: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
1302 ; MODULE: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
1303 ; MODULE: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
1304 ; MODULE: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
1305 ; MODULE: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
1306 ; MODULE: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
1307 ; MODULE: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
1308 ; MODULE: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
1309 ; MODULE: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
1310 ; MODULE: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
1311 ; MODULE: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
1312 ; MODULE: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
1313 ; MODULE: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
1315 ; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1316 ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1317 ; CGSCC: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
1318 ; CGSCC: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
1319 ; CGSCC: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
1320 ; CGSCC: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
1321 ; CGSCC: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
1322 ; CGSCC: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
1323 ; CGSCC: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
1324 ; CGSCC: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
1325 ; CGSCC: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
1326 ; CGSCC: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
1327 ; CGSCC: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
1328 ; CGSCC: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
1329 ; CGSCC: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
1330 ; CGSCC: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
1331 ; CGSCC: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
1332 ; CGSCC: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
1333 ; CGSCC: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
1334 ; CGSCC: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
1335 ; CGSCC: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
1336 ; CGSCC: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
1337 ; CGSCC: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
1338 ; CGSCC: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
1339 ; CGSCC: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
1340 ; CGSCC: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
1341 ; CGSCC: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
1342 ; CGSCC: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
1343 ; CGSCC: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
1344 ; CGSCC: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
1345 ; CGSCC: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}