1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
5 define amdgpu_ps float @else1(i32 %z, float %v) #0 {
7 ; SI: ; %bb.0: ; %main_body
8 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
9 ; SI-NEXT: ; implicit-def: $vgpr0
10 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
11 ; SI-NEXT: s_xor_b32 s0, exec_lo, s0
12 ; SI-NEXT: ; %bb.1: ; %else
13 ; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1
14 ; SI-NEXT: ; implicit-def: $vgpr1
15 ; SI-NEXT: ; %bb.2: ; %Flow
16 ; SI-NEXT: s_or_saveexec_b32 s0, s0
17 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s0
18 ; SI-NEXT: ; %bb.3: ; %if
19 ; SI-NEXT: v_add_f32_e32 v0, v1, v1
20 ; SI-NEXT: ; %bb.4: ; %end
21 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0
22 ; SI-NEXT: ; return to shader part epilog
24 %cc = icmp sgt i32 %z, 5
25 br i1 %cc, label %if, label %else
28 %v.if = fmul float %v, 2.0
32 %v.else = fmul float %v, 3.0
36 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
41 ; %v was used after if-else
42 define amdgpu_ps float @else2(i32 %z, float %v) #0 {
44 ; SI: ; %bb.0: ; %main_body
45 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
46 ; SI-NEXT: ; implicit-def: $vgpr0
47 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
48 ; SI-NEXT: s_xor_b32 s0, exec_lo, s0
49 ; SI-NEXT: ; %bb.1: ; %else
50 ; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1
51 ; SI-NEXT: ; %bb.2: ; %Flow
52 ; SI-NEXT: s_or_saveexec_b32 s0, s0
53 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s0
54 ; SI-NEXT: ; %bb.3: ; %if
55 ; SI-NEXT: v_add_f32_e32 v1, v1, v1
56 ; SI-NEXT: v_mov_b32_e32 v0, v1
57 ; SI-NEXT: ; %bb.4: ; %end
58 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0
59 ; SI-NEXT: v_add_f32_e32 v0, v1, v0
60 ; SI-NEXT: ; return to shader part epilog
62 %cc = icmp sgt i32 %z, 5
63 br i1 %cc, label %if, label %else
66 %v.if = fmul float %v, 2.0
70 %v.else = fmul float %v, 3.0
74 %r0 = phi float [ %v.if, %if ], [ %v, %else ]
75 %r1 = phi float [ %v.if, %if ], [ %v.else, %else ]
76 %r2 = fadd float %r0, %r1
80 ; if-else inside loop, %x can be optimized, but %v cannot be.
81 define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
83 ; SI: ; %bb.0: ; %entry
84 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
85 ; SI-NEXT: s_mov_b32 s1, 0
86 ; SI-NEXT: s_branch BB2_2
87 ; SI-NEXT: BB2_1: ; %if.end
88 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
89 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s2
90 ; SI-NEXT: v_add_nc_u32_e32 v2, 1, v0
91 ; SI-NEXT: s_add_i32 s1, s1, 1
92 ; SI-NEXT: s_cmp_lt_i32 s1, s0
93 ; SI-NEXT: s_cbranch_scc0 BB2_6
94 ; SI-NEXT: BB2_2: ; %for.body
95 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
96 ; SI-NEXT: ; implicit-def: $vgpr0
97 ; SI-NEXT: ; implicit-def: $vgpr3
98 ; SI-NEXT: s_and_saveexec_b32 s2, vcc_lo
99 ; SI-NEXT: s_xor_b32 s2, exec_lo, s2
100 ; SI-NEXT: ; %bb.3: ; %else
101 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
102 ; SI-NEXT: v_mul_lo_u32 v0, v2, 3
103 ; SI-NEXT: v_mul_f32_e32 v3, v1, v2
104 ; SI-NEXT: ; implicit-def: $vgpr2
105 ; SI-NEXT: ; %bb.4: ; %Flow
106 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
107 ; SI-NEXT: s_or_saveexec_b32 s2, s2
108 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s2
109 ; SI-NEXT: s_cbranch_execz BB2_1
110 ; SI-NEXT: ; %bb.5: ; %if
111 ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1
112 ; SI-NEXT: v_mul_f32_e32 v3, s1, v1
113 ; SI-NEXT: v_add_nc_u32_e32 v0, 1, v2
114 ; SI-NEXT: s_branch BB2_1
115 ; SI-NEXT: BB2_6: ; %for.end
116 ; SI-NEXT: v_add_f32_e32 v0, v0, v3
117 ; SI-NEXT: ; return to shader part epilog
119 ; %break = icmp sgt i32 %bound, 0
120 ; br i1 %break, label %for.body, label %for.end
124 %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
125 %x = phi i32 [ %x0, %entry ], [ %xinc, %if.end ]
126 %cc = icmp sgt i32 %z, 5
127 br i1 %cc, label %if, label %else
130 %i.tmp = bitcast i32 %i to float
131 %v.if = fmul float %v, %i.tmp
132 %x.if = add i32 %x, 1
136 %x.tmp = bitcast i32 %x to float
137 %v.else = fmul float %v, %x.tmp
138 %x.else = mul i32 %x, 3
142 %v.endif = phi float [ %v.if, %if ], [ %v.else, %else ]
143 %x.endif = phi i32 [ %x.if, %if ], [ %x.else, %else ]
145 %xinc = add i32 %x.endif, 1
147 %cond = icmp slt i32 %inc, %bound
148 br i1 %cond, label %for.body, label %for.end
151 %x_float = bitcast i32 %x.endif to float
152 %r = fadd float %x_float, %v.endif
156 ; a loop inside an if-else
157 define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
159 ; SI: ; %bb.0: ; %main_body
160 ; SI-NEXT: v_mov_b32_e32 v6, v0
161 ; SI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
162 ; SI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
163 ; SI-NEXT: s_mov_b32 s38, -1
164 ; SI-NEXT: v_mov_b32_e32 v0, v1
165 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
166 ; SI-NEXT: s_mov_b32 s39, 0x31c16000
167 ; SI-NEXT: s_add_u32 s36, s36, s1
168 ; SI-NEXT: s_addc_u32 s37, s37, 0
169 ; SI-NEXT: ; implicit-def: $vgpr1
170 ; SI-NEXT: s_mov_b32 s32, 0
171 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
172 ; SI-NEXT: s_xor_b32 s33, exec_lo, s0
173 ; SI-NEXT: s_cbranch_execz BB3_4
174 ; SI-NEXT: ; %bb.1: ; %else
175 ; SI-NEXT: s_mov_b32 s34, exec_lo
176 ; SI-NEXT: BB3_2: ; =>This Inner Loop Header: Depth=1
177 ; SI-NEXT: v_readfirstlane_b32 s4, v4
178 ; SI-NEXT: v_readfirstlane_b32 s5, v5
179 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
180 ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo
181 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37]
182 ; SI-NEXT: s_mov_b64 s[2:3], s[38:39]
183 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
184 ; SI-NEXT: v_mov_b32_e32 v1, v0
185 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5
186 ; SI-NEXT: ; implicit-def: $vgpr0
187 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35
188 ; SI-NEXT: s_cbranch_execnz BB3_2
190 ; SI-NEXT: s_mov_b32 exec_lo, s34
191 ; SI-NEXT: ; implicit-def: $vgpr0
192 ; SI-NEXT: ; implicit-def: $vgpr2
193 ; SI-NEXT: BB3_4: ; %Flow
194 ; SI-NEXT: s_or_saveexec_b32 s33, s33
195 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s33
196 ; SI-NEXT: s_cbranch_execz BB3_8
197 ; SI-NEXT: ; %bb.5: ; %if
198 ; SI-NEXT: s_mov_b32 s34, exec_lo
199 ; SI-NEXT: BB3_6: ; =>This Inner Loop Header: Depth=1
200 ; SI-NEXT: v_readfirstlane_b32 s4, v2
201 ; SI-NEXT: v_readfirstlane_b32 s5, v3
202 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
203 ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo
204 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37]
205 ; SI-NEXT: s_mov_b64 s[2:3], s[38:39]
206 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
207 ; SI-NEXT: v_mov_b32_e32 v1, v0
208 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3
209 ; SI-NEXT: ; implicit-def: $vgpr0
210 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35
211 ; SI-NEXT: s_cbranch_execnz BB3_6
213 ; SI-NEXT: s_mov_b32 exec_lo, s34
214 ; SI-NEXT: BB3_8: ; %end
215 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s33
216 ; SI-NEXT: v_mov_b32_e32 v0, v1
217 ; SI-NEXT: ; return to shader part epilog
219 %cc = icmp sgt i32 %z, 5
220 br i1 %cc, label %if, label %else
223 %v.if = call amdgpu_gfx float %extern_func(float %v)
227 %v.else = call amdgpu_gfx float %extern_func2(float %v)
231 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
235 ; a loop inside an if-else, but the variable is still in use after the if-else
236 define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
237 ; SI-LABEL: loop_with_use:
238 ; SI: ; %bb.0: ; %main_body
239 ; SI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
240 ; SI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
241 ; SI-NEXT: s_mov_b32 s38, -1
242 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
243 ; SI-NEXT: v_mov_b32_e32 v40, v1
244 ; SI-NEXT: s_mov_b32 s39, 0x31c16000
245 ; SI-NEXT: s_add_u32 s36, s36, s1
246 ; SI-NEXT: s_addc_u32 s37, s37, 0
247 ; SI-NEXT: ; implicit-def: $vgpr0
248 ; SI-NEXT: s_mov_b32 s32, 0
249 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
250 ; SI-NEXT: s_xor_b32 s33, exec_lo, s0
251 ; SI-NEXT: s_cbranch_execz BB4_4
252 ; SI-NEXT: ; %bb.1: ; %else
253 ; SI-NEXT: s_mov_b32 s34, exec_lo
254 ; SI-NEXT: BB4_2: ; =>This Inner Loop Header: Depth=1
255 ; SI-NEXT: v_readfirstlane_b32 s4, v4
256 ; SI-NEXT: v_readfirstlane_b32 s5, v5
257 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
258 ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo
259 ; SI-NEXT: v_mov_b32_e32 v0, v40
260 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37]
261 ; SI-NEXT: s_mov_b64 s[2:3], s[38:39]
262 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
263 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5
264 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35
265 ; SI-NEXT: s_cbranch_execnz BB4_2
267 ; SI-NEXT: s_mov_b32 exec_lo, s34
268 ; SI-NEXT: ; implicit-def: $vgpr2
269 ; SI-NEXT: BB4_4: ; %Flow
270 ; SI-NEXT: s_or_saveexec_b32 s33, s33
271 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s33
272 ; SI-NEXT: s_cbranch_execz BB4_8
273 ; SI-NEXT: ; %bb.5: ; %if
274 ; SI-NEXT: s_mov_b32 s34, exec_lo
275 ; SI-NEXT: BB4_6: ; =>This Inner Loop Header: Depth=1
276 ; SI-NEXT: v_readfirstlane_b32 s4, v2
277 ; SI-NEXT: v_readfirstlane_b32 s5, v3
278 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
279 ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo
280 ; SI-NEXT: v_mov_b32_e32 v0, v40
281 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37]
282 ; SI-NEXT: s_mov_b64 s[2:3], s[38:39]
283 ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5]
284 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3
285 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35
286 ; SI-NEXT: s_cbranch_execnz BB4_6
288 ; SI-NEXT: s_mov_b32 exec_lo, s34
289 ; SI-NEXT: BB4_8: ; %end
290 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s33
291 ; SI-NEXT: v_add_f32_e32 v0, v0, v40
292 ; SI-NEXT: ; return to shader part epilog
294 %cc = icmp sgt i32 %z, 5
295 br i1 %cc, label %if, label %else
298 %v.if = call amdgpu_gfx float %extern_func(float %v)
302 %v.else = call amdgpu_gfx float %extern_func2(float %v)
306 %r = phi float [ %v.if, %if ], [ %v.else, %else ]
307 %r2 = fadd float %r, %v
311 attributes #0 = { nounwind }