1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
5 define amdgpu_ps float @else1(i32 %z, float %v) #0 {
7 ; SI: ; %bb.0: ; %main_body
8 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
9 ; SI-NEXT: ; implicit-def: $vgpr0
10 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
11 ; SI-NEXT: s_xor_b32 s0, exec_lo, s0
12 ; SI-NEXT: s_cbranch_execnz .LBB0_3
13 ; SI-NEXT: ; %bb.1: ; %Flow
14 ; SI-NEXT: s_andn2_saveexec_b32 s0, s0
15 ; SI-NEXT: s_cbranch_execnz .LBB0_4
16 ; SI-NEXT: .LBB0_2: ; %end
17 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0
18 ; SI-NEXT: s_branch .LBB0_5
19 ; SI-NEXT: .LBB0_3: ; %else
20 ; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1
21 ; SI-NEXT: ; implicit-def: $vgpr1
22 ; SI-NEXT: s_andn2_saveexec_b32 s0, s0
23 ; SI-NEXT: s_cbranch_execz .LBB0_2
24 ; SI-NEXT: .LBB0_4: ; %if
25 ; SI-NEXT: v_add_f32_e32 v0, v1, v1
26 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0
27 ; SI-NEXT: s_branch .LBB0_5
30 %cc = icmp sgt i32 %z, 5
31 br i1 %cc, label %if, label %else
34 %v.if = fmul float %v, 2.0
38 %v.else = fmul float %v, 3.0
42 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
47 ; %v was used after if-else
48 define amdgpu_ps float @else2(i32 %z, float %v) #0 {
50 ; SI: ; %bb.0: ; %main_body
51 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
52 ; SI-NEXT: ; implicit-def: $vgpr0
53 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
54 ; SI-NEXT: s_xor_b32 s0, exec_lo, s0
55 ; SI-NEXT: ; %bb.1: ; %else
56 ; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1
57 ; SI-NEXT: ; %bb.2: ; %Flow
58 ; SI-NEXT: s_andn2_saveexec_b32 s0, s0
59 ; SI-NEXT: ; %bb.3: ; %if
60 ; SI-NEXT: v_add_f32_e32 v1, v1, v1
61 ; SI-NEXT: v_mov_b32_e32 v0, v1
62 ; SI-NEXT: ; %bb.4: ; %end
63 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0
64 ; SI-NEXT: v_add_f32_e32 v0, v1, v0
65 ; SI-NEXT: ; return to shader part epilog
67 %cc = icmp sgt i32 %z, 5
68 br i1 %cc, label %if, label %else
71 %v.if = fmul float %v, 2.0
75 %v.else = fmul float %v, 3.0
79 %r0 = phi float [ %v.if, %if ], [ %v, %else ]
80 %r1 = phi float [ %v.if, %if ], [ %v.else, %else ]
81 %r2 = fadd float %r0, %r1
85 ; if-else inside loop, %x can be optimized, but %v cannot be.
86 define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
88 ; SI: ; %bb.0: ; %entry
89 ; SI-NEXT: s_mov_b32 s1, 0
90 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
91 ; SI-NEXT: s_branch .LBB2_2
92 ; SI-NEXT: .LBB2_1: ; %if.end
93 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
94 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s2
95 ; SI-NEXT: v_add_nc_u32_e32 v2, 1, v3
96 ; SI-NEXT: s_add_i32 s1, s1, 1
97 ; SI-NEXT: s_cmp_lt_i32 s1, s0
98 ; SI-NEXT: s_cbranch_scc0 .LBB2_6
99 ; SI-NEXT: .LBB2_2: ; %for.body
100 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
101 ; SI-NEXT: ; implicit-def: $vgpr3
102 ; SI-NEXT: ; implicit-def: $vgpr0
103 ; SI-NEXT: s_and_saveexec_b32 s2, vcc_lo
104 ; SI-NEXT: s_xor_b32 s2, exec_lo, s2
105 ; SI-NEXT: ; %bb.3: ; %else
106 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
107 ; SI-NEXT: v_mul_f32_e32 v0, v1, v2
108 ; SI-NEXT: v_lshl_add_u32 v3, v2, 1, v2
109 ; SI-NEXT: ; implicit-def: $vgpr2
110 ; SI-NEXT: ; %bb.4: ; %Flow
111 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
112 ; SI-NEXT: s_andn2_saveexec_b32 s2, s2
113 ; SI-NEXT: s_cbranch_execz .LBB2_1
114 ; SI-NEXT: ; %bb.5: ; %if
115 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
116 ; SI-NEXT: v_mul_f32_e32 v0, s1, v1
117 ; SI-NEXT: v_add_nc_u32_e32 v3, 1, v2
118 ; SI-NEXT: s_branch .LBB2_1
119 ; SI-NEXT: .LBB2_6: ; %for.end
120 ; SI-NEXT: v_add_f32_e32 v0, v3, v0
121 ; SI-NEXT: ; return to shader part epilog
123 ; %break = icmp sgt i32 %bound, 0
124 ; br i1 %break, label %for.body, label %for.end
128 %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
129 %x = phi i32 [ %x0, %entry ], [ %xinc, %if.end ]
130 %cc = icmp sgt i32 %z, 5
131 br i1 %cc, label %if, label %else
134 %i.tmp = bitcast i32 %i to float
135 %v.if = fmul float %v, %i.tmp
136 %x.if = add i32 %x, 1
140 %x.tmp = bitcast i32 %x to float
141 %v.else = fmul float %v, %x.tmp
142 %x.else = mul i32 %x, 3
146 %v.endif = phi float [ %v.if, %if ], [ %v.else, %else ]
147 %x.endif = phi i32 [ %x.if, %if ], [ %x.else, %else ]
149 %xinc = add i32 %x.endif, 1
151 %cond = icmp slt i32 %inc, %bound
152 br i1 %cond, label %for.body, label %for.end
155 %x_float = bitcast i32 %x.endif to float
156 %r = fadd float %x_float, %v.endif
160 ; a loop inside an if-else
161 define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
163 ; SI: ; %bb.0: ; %main_body
164 ; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
165 ; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
166 ; SI-NEXT: s_mov_b32 s14, -1
167 ; SI-NEXT: v_mov_b32_e32 v6, v0
168 ; SI-NEXT: v_mov_b32_e32 v0, v1
169 ; SI-NEXT: s_mov_b32 s15, 0x31c16000
170 ; SI-NEXT: s_add_u32 s12, s12, s1
171 ; SI-NEXT: s_addc_u32 s13, s13, 0
172 ; SI-NEXT: s_mov_b32 s32, 0
173 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
174 ; SI-NEXT: ; implicit-def: $vgpr1
175 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
176 ; SI-NEXT: s_xor_b32 s6, exec_lo, s0
177 ; SI-NEXT: s_cbranch_execz .LBB3_4
178 ; SI-NEXT: ; %bb.1: ; %else
179 ; SI-NEXT: s_mov_b32 s7, exec_lo
180 ; SI-NEXT: .LBB3_2: ; =>This Inner Loop Header: Depth=1
181 ; SI-NEXT: v_readfirstlane_b32 s4, v4
182 ; SI-NEXT: v_readfirstlane_b32 s5, v5
183 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
184 ; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo
185 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13]
186 ; SI-NEXT: s_mov_b64 s[2:3], s[14:15]
187 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
188 ; SI-NEXT: v_mov_b32_e32 v1, v0
189 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5
190 ; SI-NEXT: ; implicit-def: $vgpr0
191 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8
192 ; SI-NEXT: s_cbranch_execnz .LBB3_2
194 ; SI-NEXT: s_mov_b32 exec_lo, s7
195 ; SI-NEXT: ; implicit-def: $vgpr0
196 ; SI-NEXT: ; implicit-def: $vgpr2
197 ; SI-NEXT: .LBB3_4: ; %Flow
198 ; SI-NEXT: s_andn2_saveexec_b32 s6, s6
199 ; SI-NEXT: s_cbranch_execz .LBB3_8
200 ; SI-NEXT: ; %bb.5: ; %if
201 ; SI-NEXT: s_mov_b32 s7, exec_lo
202 ; SI-NEXT: .LBB3_6: ; =>This Inner Loop Header: Depth=1
203 ; SI-NEXT: v_readfirstlane_b32 s4, v2
204 ; SI-NEXT: v_readfirstlane_b32 s5, v3
205 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
206 ; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo
207 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13]
208 ; SI-NEXT: s_mov_b64 s[2:3], s[14:15]
209 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
210 ; SI-NEXT: v_mov_b32_e32 v1, v0
211 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3
212 ; SI-NEXT: ; implicit-def: $vgpr0
213 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8
214 ; SI-NEXT: s_cbranch_execnz .LBB3_6
216 ; SI-NEXT: s_mov_b32 exec_lo, s7
217 ; SI-NEXT: .LBB3_8: ; %end
218 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s6
219 ; SI-NEXT: v_mov_b32_e32 v0, v1
220 ; SI-NEXT: ; return to shader part epilog
222 %cc = icmp sgt i32 %z, 5
223 br i1 %cc, label %if, label %else
226 %v.if = call amdgpu_gfx float %extern_func(float %v)
230 %v.else = call amdgpu_gfx float %extern_func2(float %v)
234 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
238 ; a loop inside an if-else, but the variable is still in use after the if-else
239 define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
240 ; SI-LABEL: loop_with_use:
241 ; SI: ; %bb.0: ; %main_body
242 ; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
243 ; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
244 ; SI-NEXT: s_mov_b32 s14, -1
245 ; SI-NEXT: v_mov_b32_e32 v40, v1
246 ; SI-NEXT: s_mov_b32 s15, 0x31c16000
247 ; SI-NEXT: s_add_u32 s12, s12, s1
248 ; SI-NEXT: s_addc_u32 s13, s13, 0
249 ; SI-NEXT: s_mov_b32 s32, 0
250 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
251 ; SI-NEXT: ; implicit-def: $vgpr0
252 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
253 ; SI-NEXT: s_xor_b32 s6, exec_lo, s0
254 ; SI-NEXT: s_cbranch_execz .LBB4_4
255 ; SI-NEXT: ; %bb.1: ; %else
256 ; SI-NEXT: s_mov_b32 s7, exec_lo
257 ; SI-NEXT: .LBB4_2: ; =>This Inner Loop Header: Depth=1
258 ; SI-NEXT: v_readfirstlane_b32 s4, v4
259 ; SI-NEXT: v_readfirstlane_b32 s5, v5
260 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
261 ; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo
262 ; SI-NEXT: v_mov_b32_e32 v0, v40
263 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13]
264 ; SI-NEXT: s_mov_b64 s[2:3], s[14:15]
265 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
266 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5
267 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8
268 ; SI-NEXT: s_cbranch_execnz .LBB4_2
270 ; SI-NEXT: s_mov_b32 exec_lo, s7
271 ; SI-NEXT: ; implicit-def: $vgpr2
272 ; SI-NEXT: .LBB4_4: ; %Flow
273 ; SI-NEXT: s_andn2_saveexec_b32 s6, s6
274 ; SI-NEXT: s_cbranch_execz .LBB4_8
275 ; SI-NEXT: ; %bb.5: ; %if
276 ; SI-NEXT: s_mov_b32 s7, exec_lo
277 ; SI-NEXT: .LBB4_6: ; =>This Inner Loop Header: Depth=1
278 ; SI-NEXT: v_readfirstlane_b32 s4, v2
279 ; SI-NEXT: v_readfirstlane_b32 s5, v3
280 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
281 ; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo
282 ; SI-NEXT: v_mov_b32_e32 v0, v40
283 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13]
284 ; SI-NEXT: s_mov_b64 s[2:3], s[14:15]
285 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
286 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3
287 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8
288 ; SI-NEXT: s_cbranch_execnz .LBB4_6
290 ; SI-NEXT: s_mov_b32 exec_lo, s7
291 ; SI-NEXT: .LBB4_8: ; %end
292 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s6
293 ; SI-NEXT: v_add_f32_e32 v0, v0, v40
294 ; SI-NEXT: ; return to shader part epilog
296 %cc = icmp sgt i32 %z, 5
297 br i1 %cc, label %if, label %else
300 %v.if = call amdgpu_gfx float %extern_func(float %v)
304 %v.else = call amdgpu_gfx float %extern_func2(float %v)
308 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
309 %r2 = fadd float %r, %v
313 attributes #0 = { nounwind }