1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
3 ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
5 define amdgpu_kernel void @infinite_loop(ptr addrspace(1) %out) {
6 ; SI-LABEL: infinite_loop:
7 ; SI: ; %bb.0: ; %entry
8 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
9 ; SI-NEXT: s_mov_b32 s3, 0xf000
10 ; SI-NEXT: s_mov_b32 s2, -1
11 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
12 ; SI-NEXT: s_and_b64 vcc, exec, -1
13 ; SI-NEXT: .LBB0_1: ; %loop
14 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
15 ; SI-NEXT: s_waitcnt lgkmcnt(0)
16 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
17 ; SI-NEXT: s_waitcnt vmcnt(0)
18 ; SI-NEXT: s_mov_b64 vcc, vcc
19 ; SI-NEXT: s_cbranch_vccnz .LBB0_1
20 ; SI-NEXT: ; %bb.2: ; %DummyReturnBlock
22 ; IR-LABEL: @infinite_loop(
24 ; IR-NEXT: br label [[LOOP:%.*]]
26 ; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
27 ; IR-NEXT: br i1 true, label [[LOOP]], label [[DUMMYRETURNBLOCK:%.*]]
28 ; IR: DummyReturnBlock:
35 store volatile i32 999, ptr addrspace(1) %out, align 4
39 define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {
40 ; SI-LABEL: infinite_loop_ret:
41 ; SI: ; %bb.0: ; %entry
42 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
43 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
44 ; SI-NEXT: s_cbranch_execz .LBB1_3
45 ; SI-NEXT: ; %bb.1: ; %loop.preheader
46 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
47 ; SI-NEXT: s_mov_b32 s3, 0xf000
48 ; SI-NEXT: s_mov_b32 s2, -1
49 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
50 ; SI-NEXT: s_and_b64 vcc, exec, -1
51 ; SI-NEXT: .LBB1_2: ; %loop
52 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
53 ; SI-NEXT: s_waitcnt lgkmcnt(0)
54 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
55 ; SI-NEXT: s_waitcnt vmcnt(0)
56 ; SI-NEXT: s_mov_b64 vcc, vcc
57 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
58 ; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
60 ; IR-LABEL: @infinite_loop_ret(
62 ; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
63 ; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1
64 ; IR-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
66 ; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
67 ; IR-NEXT: br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]]
68 ; IR: UnifiedReturnBlock:
72 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
73 %cond = icmp eq i32 %tmp, 1
74 br i1 %cond, label %loop, label %return
77 store volatile i32 999, ptr addrspace(1) %out, align 4
84 define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {
85 ; SI-LABEL: infinite_loops:
86 ; SI: ; %bb.0: ; %entry
87 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
88 ; SI-NEXT: s_mov_b64 s[2:3], -1
89 ; SI-NEXT: s_cbranch_scc1 .LBB2_4
91 ; SI-NEXT: s_mov_b32 s3, 0xf000
92 ; SI-NEXT: s_mov_b32 s2, -1
93 ; SI-NEXT: v_mov_b32_e32 v0, 0x378
94 ; SI-NEXT: s_and_b64 vcc, exec, -1
95 ; SI-NEXT: .LBB2_2: ; %loop2
96 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
97 ; SI-NEXT: s_waitcnt lgkmcnt(0)
98 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
99 ; SI-NEXT: s_waitcnt vmcnt(0)
100 ; SI-NEXT: s_mov_b64 vcc, vcc
101 ; SI-NEXT: s_cbranch_vccnz .LBB2_2
102 ; SI-NEXT: ; %bb.3: ; %Flow
103 ; SI-NEXT: s_mov_b64 s[2:3], 0
104 ; SI-NEXT: .LBB2_4: ; %Flow2
105 ; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
106 ; SI-NEXT: s_waitcnt lgkmcnt(0)
107 ; SI-NEXT: s_mov_b64 vcc, vcc
108 ; SI-NEXT: s_cbranch_vccz .LBB2_7
110 ; SI-NEXT: s_mov_b32 s3, 0xf000
111 ; SI-NEXT: s_mov_b32 s2, -1
112 ; SI-NEXT: s_waitcnt expcnt(0)
113 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
114 ; SI-NEXT: s_and_b64 vcc, exec, 0
115 ; SI-NEXT: .LBB2_6: ; %loop1
116 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
117 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
118 ; SI-NEXT: s_waitcnt vmcnt(0)
119 ; SI-NEXT: s_mov_b64 vcc, vcc
120 ; SI-NEXT: s_cbranch_vccz .LBB2_6
121 ; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
123 ; IR-LABEL: @infinite_loops(
125 ; IR-NEXT: br i1 undef, label [[LOOP1:%.*]], label [[LOOP2:%.*]]
127 ; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
128 ; IR-NEXT: br i1 true, label [[LOOP1]], label [[DUMMYRETURNBLOCK:%.*]]
130 ; IR-NEXT: store volatile i32 888, ptr addrspace(1) [[OUT]], align 4
131 ; IR-NEXT: br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]]
132 ; IR: DummyReturnBlock:
136 br i1 undef, label %loop1, label %loop2
139 store volatile i32 999, ptr addrspace(1) %out, align 4
143 store volatile i32 888, ptr addrspace(1) %out, align 4
147 define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
148 ; SI-LABEL: infinite_loop_nest_ret:
149 ; SI: ; %bb.0: ; %entry
150 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
151 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
152 ; SI-NEXT: s_cbranch_execz .LBB3_5
153 ; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
154 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
155 ; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
156 ; SI-NEXT: s_mov_b32 s7, 0xf000
157 ; SI-NEXT: s_mov_b32 s6, -1
158 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
159 ; SI-NEXT: .LBB3_2: ; %outer_loop
160 ; SI-NEXT: ; =>This Loop Header: Depth=1
161 ; SI-NEXT: ; Child Loop BB3_3 Depth 2
162 ; SI-NEXT: s_mov_b64 s[2:3], 0
163 ; SI-NEXT: .LBB3_3: ; %inner_loop
164 ; SI-NEXT: ; Parent Loop BB3_2 Depth=1
165 ; SI-NEXT: ; => This Inner Loop Header: Depth=2
166 ; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
167 ; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
168 ; SI-NEXT: s_waitcnt lgkmcnt(0)
169 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
170 ; SI-NEXT: s_waitcnt vmcnt(0)
171 ; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
172 ; SI-NEXT: s_cbranch_execnz .LBB3_3
173 ; SI-NEXT: ; %bb.4: ; %loop.exit.guard
174 ; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
175 ; SI-NEXT: s_or_b64 exec, exec, s[2:3]
176 ; SI-NEXT: s_mov_b64 vcc, 0
177 ; SI-NEXT: s_branch .LBB3_2
178 ; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
180 ; IR-LABEL: @infinite_loop_nest_ret(
182 ; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
183 ; IR-NEXT: [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
184 ; IR-NEXT: br i1 [[COND1]], label [[OUTER_LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
186 ; IR-NEXT: br label [[INNER_LOOP:%.*]]
188 ; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
189 ; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
190 ; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK]]
191 ; IR: TransitionBlock:
192 ; IR-NEXT: br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]]
193 ; IR: UnifiedReturnBlock:
197 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
198 %cond1 = icmp ne i32 %tmp, 1 ; avoid following BB optimizing away through the domination
199 br i1 %cond1, label %outer_loop, label %return
202 ; %cond2 = icmp eq i32 %tmp, 2
203 ; br i1 %cond2, label %outer_loop, label %inner_loop
206 inner_loop: ; preds = %LeafBlock, %LeafBlock1
207 store volatile i32 999, ptr addrspace(1) %out, align 4
208 %cond3 = icmp eq i32 %tmp, 3
209 br i1 %cond3, label %inner_loop, label %outer_loop
215 declare i32 @llvm.amdgcn.workitem.id.x()