1 ; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
2 ; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
3 ; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s
9 ; Non-trivial loop unswitching where there are two distinct trivial
10 ; conditions to unswitch within the loop. The conditions are divergent
11 ; and should not unswitch.
12 define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) {
13 ; CHECK-LABEL: @test1(
17 ; CHECK-NEXT: br label %loop_begin
20 br i1 %cond1, label %loop_a, label %loop_b
22 ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
25 %unused.a = call i32 @a()
28 ; CHECK-NEXT: %unused.a = call i32 @a()
29 ; CHECK-NEXT: br label %latch
32 br i1 %cond2, label %loop_b_a, label %loop_b_b
34 ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
37 %unused.b = call i32 @b()
40 ; CHECK-NEXT: %unused.b = call i32 @b()
41 ; CHECK-NEXT: br label %latch
44 %unused.c = call i32 @c()
47 ; CHECK-NEXT: %unused.c = call i32 @c()
48 ; CHECK-NEXT: br label %latch
51 %v = load i1, ptr %ptr
52 br i1 %v, label %loop_begin, label %loop_exit
54 ; CHECK-NEXT: %v = load i1, ptr %ptr
55 ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
60 ; CHECK-NEXT: ret void
63 ; Non-trivial loop unswitching where there are two distinct trivial
64 ; conditions to unswitch within the loop. The conditions are known to
65 ; be uniform, so it should be unswitchable. However, unswitch
66 ; currently does not make use of UniformityAnalysis.
67 define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) {
68 ; CHECK-LABEL: @test1_uniform(
72 ; CHECK-NEXT: br label %loop_begin
75 br i1 %cond1, label %loop_a, label %loop_b
77 ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
80 %unused.a = call i32 @a()
83 ; CHECK-NEXT: %unused.a = call i32 @a()
84 ; CHECK-NEXT: br label %latch
87 br i1 %cond2, label %loop_b_a, label %loop_b_b
89 ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
92 %unused.b = call i32 @b()
95 ; CHECK-NEXT: %unused.b = call i32 @b()
96 ; CHECK-NEXT: br label %latch
99 %unused.c = call i32 @c()
102 ; CHECK-NEXT: %unused.c = call i32 @c()
103 ; CHECK-NEXT: br label %latch
106 %v = load i1, ptr %ptr
107 br i1 %v, label %loop_begin, label %loop_exit
109 ; CHECK-NEXT: %v = load i1, ptr %ptr
110 ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
115 ; CHECK-NEXT: ret void
118 ; Non-trivial loop unswitching where there are two distinct trivial
119 ; conditions to unswitch within the loop. There is no divergence
120 ; because it's assumed it can only execute with a workgroup of size 1.
121 define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
122 ; CHECK-LABEL: @test1_single_lane_execution(
126 ; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
129 br i1 %cond1, label %loop_a, label %loop_b
134 ; The 'loop_a' unswitched loop.
136 ; CHECK: entry.split.us:
137 ; CHECK-NEXT: br label %loop_begin.us
139 ; CHECK: loop_begin.us:
140 ; CHECK-NEXT: br label %loop_a.us
143 ; CHECK-NEXT: call i32 @a()
144 ; CHECK-NEXT: br label %latch.us
147 ; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
148 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
150 ; CHECK: loop_exit.split.us:
151 ; CHECK-NEXT: br label %loop_exit
154 br i1 %cond2, label %loop_b_a, label %loop_b_b
155 ; The second unswitched condition.
157 ; CHECK: entry.split:
158 ; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
163 ; The 'loop_b_a' unswitched loop.
165 ; CHECK: entry.split.split.us:
166 ; CHECK-NEXT: br label %loop_begin.us1
168 ; CHECK: loop_begin.us1:
169 ; CHECK-NEXT: br label %loop_b.us
172 ; CHECK-NEXT: br label %loop_b_a.us
174 ; CHECK: loop_b_a.us:
175 ; CHECK-NEXT: call i32 @b()
176 ; CHECK-NEXT: br label %latch.us2
179 ; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
180 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
182 ; CHECK: loop_exit.split.split.us:
183 ; CHECK-NEXT: br label %loop_exit.split
188 ; The 'loop_b_b' unswitched loop.
190 ; CHECK: entry.split.split:
191 ; CHECK-NEXT: br label %loop_begin
194 ; CHECK-NEXT: br label %loop_b
197 ; CHECK-NEXT: br label %loop_b_b
200 ; CHECK-NEXT: call i32 @c()
201 ; CHECK-NEXT: br label %latch
204 ; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
205 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
207 ; CHECK: loop_exit.split.split:
208 ; CHECK-NEXT: br label %loop_exit.split
211 %v = load i1, ptr %ptr
212 br i1 %v, label %loop_begin, label %loop_exit
216 ; CHECK: loop_exit.split:
217 ; CHECK-NEXT: br label %loop_exit
223 attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }