1 ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
3 ; temporal-divergent use of value carried by divergent loop
4 define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
5 ; CHECK-LABEL: for function 'temporal_diverge':
6 ; CHECK-NOT: DIVERGENT: %uni.
7 ; CHECK-NOT: DIVERGENT: br i1 %uni.
10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
11 %uni.cond = icmp slt i32 %a, 0
15 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
16 %uni.inc = add i32 %uni.merge.h, 1
17 ; CHECK: DIVERGENT: %div.exitx =
18 %div.exitx = icmp slt i32 %tid, 0
19 ; CHECK: DIVERGENT: br i1 %div.exitx,
20 br i1 %div.exitx, label %X, label %H ; divergent branch
23 ; CHECK: DIVERGENT: %div.user =
24 %div.user = add i32 %uni.inc, 5
28 define amdgpu_kernel void @phi_at_exit(i32 %n, i32 %a, i32 %b) #0 {
29 ; CHECK-LABEL: for function 'phi_at_exit':
30 ; CHECK-NOT: DIVERGENT: %uni.
31 ; CHECK-NOT: DIVERGENT: br i1 %uni.
34 %tid = call i32 @llvm.amdgcn.workitem.id.x()
35 %uni.cond = icmp slt i32 %a, 0
36 br i1 %uni.cond, label %H, label %X
39 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
40 %uni.inc = add i32 %uni.merge.h, 1
41 ; CHECK: DIVERGENT: %div.exitx =
42 %div.exitx = icmp slt i32 %tid, 0
43 ; CHECK: DIVERGENT: br i1 %div.exitx,
44 br i1 %div.exitx, label %X, label %H ; divergent branch
47 ; CHECK: DIVERGENT: %div.phi =
48 %div.phi = phi i32 [ 0, %entry], [ %uni.inc, %H ]
49 %div.user = add i32 %div.phi, 5
53 define amdgpu_kernel void @phi_after_exit(i32 %n, i32 %a, i32 %b) #0 {
54 ; CHECK-LABEL: for function 'phi_after_exit':
55 ; CHECK-NOT: DIVERGENT: %uni.
56 ; CHECK-NOT: DIVERGENT: br i1 %uni.
59 %tid = call i32 @llvm.amdgcn.workitem.id.x()
60 %uni.cond = icmp slt i32 %a, 0
61 br i1 %uni.cond, label %H, label %Y
64 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
65 %uni.inc = add i32 %uni.merge.h, 1
66 ; CHECK: DIVERGENT: %div.exitx =
67 %div.exitx = icmp slt i32 %tid, 0
68 ; CHECK: DIVERGENT: br i1 %div.exitx,
69 br i1 %div.exitx, label %X, label %H ; divergent branch
75 ; CHECK: DIVERGENT: %div.phi =
76 %div.phi = phi i32 [ 0, %entry], [ %uni.inc, %X ]
77 %div.user = add i32 %div.phi, 5
81 ; temporal-divergent use of value carried by divergent loop inside a top-level loop
82 define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
83 ; CHECK-LABEL: for function 'temporal_diverge_inloop':
84 ; CHECK-NOT: DIVERGENT: %uni.
85 ; CHECK-NOT: DIVERGENT: br i1 %uni.
88 %tid = call i32 @llvm.amdgcn.workitem.id.x()
89 %uni.cond = icmp slt i32 %a, 0
96 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
97 %uni.inc = add i32 %uni.merge.h, 1
98 %div.exitx = icmp slt i32 %tid, 0
99 br i1 %div.exitx, label %X, label %H ; divergent branch
100 ; CHECK: DIVERGENT: %div.exitx =
101 ; CHECK: DIVERGENT: br i1 %div.exitx,
104 ; CHECK: DIVERGENT: %div.user =
105 %div.user = add i32 %uni.inc, 5
106 br i1 %uni.cond, label %G, label %Y
109 ; CHECK: DIVERGENT: %div.alsouser =
110 %div.alsouser = add i32 %uni.inc, 5
115 ; temporal-uniform use of a value, definition and users are carried by a
116 ; surrounding divergent loop
117 define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
118 ; CHECK-LABEL: for function 'temporal_uniform_indivloop':
119 ; CHECK-NOT: DIVERGENT: %uni.
120 ; CHECK-NOT: DIVERGENT: br i1 %uni.
123 %tid = call i32 @llvm.amdgcn.workitem.id.x()
124 %uni.cond = icmp slt i32 %a, 0
131 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
132 %uni.inc = add i32 %uni.merge.h, 1
133 br i1 %uni.cond, label %X, label %H
136 %uni.user = add i32 %uni.inc, 5
137 %div.exity = icmp slt i32 %tid, 0
138 ; CHECK: DIVERGENT: %div.exity =
139 br i1 %div.exity, label %G, label %Y
140 ; CHECK: DIVERGENT: br i1 %div.exity,
143 %div.alsouser = add i32 %uni.inc, 5
145 ; CHECK: DIVERGENT: %div.alsouser =
149 ; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
150 define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
151 ; CHECK-LABEL: for function 'temporal_diverge_loopuser':
152 ; CHECK-NOT: DIVERGENT: %uni.
153 ; CHECK-NOT: DIVERGENT: br i1 %uni.
156 %tid = call i32 @llvm.amdgcn.workitem.id.x()
157 %uni.cond = icmp slt i32 %a, 0
161 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
162 %uni.inc = add i32 %uni.merge.h, 1
163 %div.exitx = icmp slt i32 %tid, 0
164 br i1 %div.exitx, label %X, label %H ; divergent branch
165 ; CHECK: DIVERGENT: %div.exitx =
166 ; CHECK: DIVERGENT: br i1 %div.exitx,
172 ; C HECK: DIVERGENT: %div.user =
173 %div.user = add i32 %uni.inc, 5
174 br i1 %uni.cond, label %G, label %Y
175 ; CHECK: DIVERGENT: %div.user =
181 ; temporal-divergent use of value carried by divergent loop, user is inside
182 ; sibling loop, defs and use are carried by a uniform loop
183 define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
184 ; CHECK-LABEL: for function 'temporal_diverge_loopuser_nested':
185 ; CHECK-NOT: DIVERGENT: %uni.
186 ; CHECK-NOT: DIVERGENT: br i1 %uni.
189 %tid = call i32 @llvm.amdgcn.workitem.id.x()
190 %uni.cond = icmp slt i32 %a, 0
197 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
198 %uni.inc = add i32 %uni.merge.h, 1
199 %div.exitx = icmp slt i32 %tid, 0
200 br i1 %div.exitx, label %X, label %H ; divergent branch
201 ; CHECK: DIVERGENT: %div.exitx =
202 ; CHECK: DIVERGENT: br i1 %div.exitx,
205 ; CHECK: DIVERGENT: %div.user =
206 %div.user = add i32 %uni.inc, 5
207 br i1 %uni.cond, label %X, label %G
213 declare i32 @llvm.amdgcn.workitem.id.x() #0
215 attributes #0 = { nounwind readnone }