1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2 # RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
5 define void @divergent_i1_phi_uniform_branch() {ret void}
6 define void @divergent_i1_phi_uniform_branch_simple() {ret void}
7 define void @divergent_i1_phi_used_inside_loop() {ret void}
8 define void @divergent_i1_phi_used_inside_loop_bigger_loop_body() {ret void}
9 define void @_amdgpu_cs_main() #0 {ret void}
11 attributes #0 = {"amdgpu-flat-work-group-size"="1,1"}
15 name: divergent_i1_phi_uniform_branch
17 tracksRegLiveness: true
19 ; GFX10-LABEL: name: divergent_i1_phi_uniform_branch
21 ; GFX10-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000)
22 ; GFX10-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
24 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
25 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
26 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
27 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
28 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
29 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3
30 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr4
31 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
32 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
33 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
34 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
35 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
36 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
37 ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
38 ; GFX10-NEXT: G_BR %bb.1
41 ; GFX10-NEXT: successors: %bb.3(0x80000000)
43 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 123
44 ; GFX10-NEXT: G_STORE [[C2]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
45 ; GFX10-NEXT: G_BR %bb.3
48 ; GFX10-NEXT: successors: %bb.4(0x80000000)
50 ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %20(s1), %bb.3
51 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
52 ; GFX10-NEXT: G_BR %bb.4
55 ; GFX10-NEXT: successors: %bb.2(0x80000000)
57 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
58 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C3]]
59 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
60 ; GFX10-NEXT: G_BR %bb.2
63 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
64 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
65 ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]]
66 ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
67 ; GFX10-NEXT: S_ENDPGM 0
69 successors: %bb.1(0x30000000), %bb.2(0x50000000)
70 liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
72 %0:_(s32) = COPY $vgpr0
73 %1:_(s32) = COPY $vgpr1
74 %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
75 %3:_(s32) = COPY $vgpr2
76 %4:_(s32) = COPY $sgpr0
77 %5:_(s32) = COPY $vgpr3
78 %6:_(s32) = COPY $vgpr4
79 %7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32)
80 %8:_(s32) = G_CONSTANT i32 6
81 %9:_(s1) = G_ICMP intpred(uge), %3(s32), %8
82 %10:_(s32) = G_CONSTANT i32 0
83 %11:_(s1) = G_ICMP intpred(ne), %4(s32), %10
84 G_BRCOND %11(s1), %bb.2
88 successors: %bb.3(0x80000000)
90 %12:_(s32) = G_CONSTANT i32 123
91 G_STORE %12(s32), %7(p1) :: (store (s32), addrspace 1)
95 successors: %bb.4(0x80000000)
97 %13:_(s1) = G_PHI %14(s1), %bb.3, %9(s1), %bb.0
101 successors: %bb.2(0x80000000)
103 %15:_(s32) = G_CONSTANT i32 1
104 %14:_(s1) = G_ICMP intpred(ult), %3(s32), %15
108 %16:_(s32) = G_CONSTANT i32 2
109 %17:_(s32) = G_CONSTANT i32 1
110 %18:_(s32) = G_SELECT %13(s1), %17, %16
111 G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1)
116 name: divergent_i1_phi_uniform_branch_simple
118 tracksRegLiveness: true
120 ; GFX10-LABEL: name: divergent_i1_phi_uniform_branch_simple
122 ; GFX10-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000)
123 ; GFX10-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
125 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
126 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
127 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
128 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
129 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
130 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
131 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
132 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
133 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
134 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
135 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1)
136 ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
137 ; GFX10-NEXT: G_BR %bb.1
140 ; GFX10-NEXT: successors: %bb.2(0x80000000)
142 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
143 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
144 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
145 ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
146 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc
147 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
150 ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
151 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
152 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
153 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
154 ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]]
155 ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
156 ; GFX10-NEXT: S_ENDPGM 0
158 successors: %bb.1(0x30000000), %bb.2(0x50000000)
159 liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
161 %0:_(s32) = COPY $vgpr0
162 %1:_(s32) = COPY $vgpr1
163 %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
164 %3:_(s32) = COPY $vgpr2
165 %4:_(s32) = COPY $sgpr0
166 %5:_(s32) = G_CONSTANT i32 6
167 %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5
168 %7:_(s32) = G_CONSTANT i32 0
169 %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7
170 G_BRCOND %8(s1), %bb.2
174 successors: %bb.2(0x80000000)
176 %9:_(s32) = G_CONSTANT i32 1
177 %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9
180 %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1
181 %12:_(s32) = G_CONSTANT i32 2
182 %13:_(s32) = G_CONSTANT i32 1
183 %14:_(s32) = G_SELECT %11(s1), %13, %12
184 G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1)
189 name: divergent_i1_phi_used_inside_loop
191 tracksRegLiveness: true
193 ; GFX10-LABEL: name: divergent_i1_phi_used_inside_loop
195 ; GFX10-NEXT: successors: %bb.1(0x80000000)
196 ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
198 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
199 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
200 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
201 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
202 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
203 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
204 ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
207 ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
209 ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
210 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
211 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
212 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
213 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
214 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
215 ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
216 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
217 ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
218 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
219 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
220 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
221 ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
222 ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
223 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
224 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
225 ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
226 ; GFX10-NEXT: G_BR %bb.2
229 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
230 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
231 ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
232 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
233 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
234 ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
235 ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
236 ; GFX10-NEXT: SI_RETURN
238 successors: %bb.1(0x80000000)
239 liveins: $vgpr0, $vgpr1, $vgpr2
241 %0:_(s32) = COPY $vgpr0
242 %1:_(s32) = COPY $vgpr1
243 %2:_(s32) = COPY $vgpr2
244 %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
245 %4:_(s1) = G_CONSTANT i1 true
246 %5:_(s32) = G_CONSTANT i32 0
249 successors: %bb.2(0x04000000), %bb.1(0x7c000000)
251 %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
252 %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
253 %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
254 %12:_(s1) = G_CONSTANT i1 true
255 %11:_(s1) = G_XOR %10, %12
256 %13:_(s32) = G_UITOFP %8(s32)
257 %14:_(s1) = G_FCMP floatpred(ogt), %13(s32), %0
258 %15:_(s32) = G_CONSTANT i32 1
259 %9:_(s32) = G_ADD %8, %15
260 %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32)
261 SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
265 %16:_(s1) = G_PHI %11(s1), %bb.1
266 %17:_(s32) = G_PHI %7(s32), %bb.1
267 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
268 %18:_(s32) = G_FCONSTANT float 0.000000e+00
269 %19:_(s32) = G_FCONSTANT float 1.000000e+00
270 %20:_(s32) = G_SELECT %16(s1), %19, %18
271 G_STORE %20(s32), %3(p0) :: (store (s32))
276 name: divergent_i1_phi_used_inside_loop_bigger_loop_body
278 tracksRegLiveness: true
280 ; GFX10-LABEL: name: divergent_i1_phi_used_inside_loop_bigger_loop_body
282 ; GFX10-NEXT: successors: %bb.1(0x80000000)
283 ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
285 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
286 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
287 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
288 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
289 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
290 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
291 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
292 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
293 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
294 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
295 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
296 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
297 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
298 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
299 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
300 ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
303 ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
305 ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5
306 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5
307 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
308 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
309 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
310 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
311 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
312 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
313 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]]
314 ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
315 ; GFX10-NEXT: G_BR %bb.2
318 ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
320 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1
321 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
322 ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
323 ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
324 ; GFX10-NEXT: G_BR %bb.3
327 ; GFX10-NEXT: successors: %bb.5(0x80000000)
329 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
330 ; GFX10-NEXT: G_STORE [[C5]](s32), [[MV1]](p0) :: (store (s32))
331 ; GFX10-NEXT: G_BR %bb.5
334 ; GFX10-NEXT: successors: %bb.2(0x80000000)
336 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
337 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
338 ; GFX10-NEXT: G_STORE [[C7]](s32), [[MV2]](p0) :: (store (s32))
339 ; GFX10-NEXT: G_BR %bb.2
342 ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
344 ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
345 ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]]
346 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
347 ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
348 ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
349 ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
350 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
351 ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
352 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
353 ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
354 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
355 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
356 ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
357 ; GFX10-NEXT: G_BR %bb.6
360 ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
361 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
362 ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
363 ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
364 ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
365 ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]]
366 ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
367 ; GFX10-NEXT: SI_RETURN
369 successors: %bb.1(0x80000000)
370 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
372 %0:_(s32) = COPY $vgpr0
373 %1:_(s32) = COPY $vgpr1
374 %2:_(s32) = COPY $vgpr2
375 %3:_(s32) = COPY $vgpr3
376 %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
377 %5:_(s32) = COPY $vgpr4
378 %6:_(s32) = COPY $vgpr5
379 %7:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
380 %8:_(s32) = COPY $vgpr6
381 %9:_(s32) = COPY $vgpr7
382 %10:_(p0) = G_MERGE_VALUES %8(s32), %9(s32)
383 %11:_(s32) = G_CONSTANT i32 0
384 %12:_(s32) = G_FCONSTANT float 1.000000e+00
385 %13:_(s1) = G_FCMP floatpred(ogt), %1(s32), %12
388 successors: %bb.4(0x40000000), %bb.2(0x40000000)
390 %14:_(s32) = G_PHI %15(s32), %bb.5, %11(s32), %bb.0
391 %16:_(s32) = G_PHI %11(s32), %bb.0, %17(s32), %bb.5
392 %18:_(s1) = G_PHI %13(s1), %bb.0, %19(s1), %bb.5
393 %20:_(s1) = G_CONSTANT i1 true
394 %21:_(s32) = G_CONSTANT i32 1000
395 %22:_(s1) = G_ICMP intpred(sle), %16(s32), %21
396 G_BRCOND %22(s1), %bb.4
400 successors: %bb.3(0x40000000), %bb.5(0x40000000)
402 %23:_(s1) = G_PHI %24(s1), %bb.4, %20(s1), %bb.1
403 %25:_(s1) = G_CONSTANT i1 true
404 %26:_(s1) = G_XOR %23, %25
405 G_BRCOND %26(s1), %bb.5
409 successors: %bb.5(0x80000000)
411 %27:_(s32) = G_CONSTANT i32 1000
412 G_STORE %27(s32), %7(p0) :: (store (s32))
416 successors: %bb.2(0x80000000)
418 %24:_(s1) = G_CONSTANT i1 false
419 %28:_(s32) = G_CONSTANT i32 1000
420 G_STORE %28(s32), %10(p0) :: (store (s32))
424 successors: %bb.6(0x04000000), %bb.1(0x7c000000)
426 %29:_(s1) = G_CONSTANT i1 true
427 %19:_(s1) = G_XOR %18, %29
428 %30:_(s32) = G_UITOFP %16(s32)
429 %31:_(s1) = G_FCMP floatpred(ogt), %30(s32), %0
430 %32:_(s32) = G_CONSTANT i32 1
431 %17:_(s32) = G_ADD %16, %32
432 %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %31(s1), %14(s32)
433 SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
437 %33:_(s1) = G_PHI %19(s1), %bb.5
438 %34:_(s32) = G_PHI %15(s32), %bb.5
439 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
440 %35:_(s32) = G_FCONSTANT float 0.000000e+00
441 %36:_(s32) = G_FCONSTANT float 1.000000e+00
442 %37:_(s32) = G_SELECT %33(s1), %36, %35
443 G_STORE %37(s32), %4(p0) :: (store (s32))
448 name: _amdgpu_cs_main
450 tracksRegLiveness: true
452 ; GFX10-LABEL: name: _amdgpu_cs_main
454 ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
455 ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
457 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
458 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
459 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
460 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
461 ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
462 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296
463 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C]]
464 ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
465 ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[ZEXT]]
466 ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](s64)
467 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x s32>))
468 ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
469 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[BITCAST]](s256)
470 ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](s128)
471 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
472 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
473 ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](s32), [[C1]](s32)
474 ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](s32), [[INT1]](s32)
475 ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]]
476 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
477 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FREEZE]], [[C3]](s32)
478 ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
479 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](s32), [[C1]]
480 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
481 ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C4]]
482 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32)
483 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
484 ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]]
485 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
486 ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2
487 ; GFX10-NEXT: G_BR %bb.1
490 ; GFX10-NEXT: successors: %bb.3(0x80000000)
492 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
493 ; GFX10-NEXT: G_BR %bb.3
496 ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
498 ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4
499 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0
500 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
501 ; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5
502 ; GFX10-NEXT: G_BR %bb.6
505 ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
507 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %34(s32), %bb.3, [[C6]](s32), %bb.1
508 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %36(s32), %bb.3, [[FREEZE]](s32), %bb.1
509 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %38(s32), %bb.3, [[C6]](s32), %bb.1
510 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
511 ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
512 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]]
513 ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
514 ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C8]]
515 ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
516 ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
517 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[C7]]
518 ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.3
519 ; GFX10-NEXT: G_BR %bb.4
522 ; GFX10-NEXT: successors: %bb.2(0x80000000)
524 ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.3
525 ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
526 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]]
527 ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]]
528 ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1)
529 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1)
530 ; GFX10-NEXT: G_BR %bb.2
533 ; GFX10-NEXT: successors: %bb.6(0x80000000)
535 ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
536 ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
537 ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]]
540 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5
541 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
542 ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]]
543 ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
544 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD3]], [[C12]](s32)
545 ; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
546 ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](s32), [[UV1]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (s32), align 1, addrspace 8)
547 ; GFX10-NEXT: S_ENDPGM 0
549 successors: %bb.1(0x40000000), %bb.2(0x40000000)
550 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
552 %0:_(s32) = COPY $sgpr0
553 %1:_(s32) = COPY $sgpr1
554 %2:_(s32) = COPY $vgpr0
555 %3:_(s32) = G_IMPLICIT_DEF
556 %4:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
557 %5:_(s64) = G_CONSTANT i64 -4294967296
558 %6:_(s64) = G_AND %4, %5
559 %7:_(s64) = G_ZEXT %0(s32)
560 %8:_(s64) = G_OR %6, %7
561 %9:_(p4) = G_INTTOPTR %8(s64)
562 %10:_(<8 x s32>) = G_LOAD %9(p4) :: (load (<8 x s32>))
563 %11:_(s256) = G_BITCAST %10(<8 x s32>)
564 %12:_(s128) = G_TRUNC %11(s256)
565 %13:_(<4 x s32>) = G_BITCAST %12(s128)
566 %15:_(s32) = G_CONSTANT i32 0
567 %14:_(s32) = G_CONSTANT i32 -1
568 %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %14(s32), %15(s32)
569 %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %14(s32), %16(s32)
570 %18:_(s32) = G_FREEZE %17
571 %19:_(s32) = G_CONSTANT i32 2
572 %20:_(s32) = G_SHL %18, %19(s32)
573 %21:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %15(s32), %20, %15, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
574 %22:_(s1) = G_ICMP intpred(eq), %21(s32), %15
575 %23:_(s32) = G_CONSTANT i32 1
576 %24:_(s32) = G_AND %18, %23
577 %25:_(s1) = G_TRUNC %24(s32)
578 %26:_(s1) = G_CONSTANT i1 true
579 %27:_(s1) = G_XOR %25, %26
580 G_BRCOND %27(s1), %bb.2
584 successors: %bb.3(0x80000000)
586 %28:_(s32) = G_CONSTANT i32 0
590 successors: %bb.5(0x40000000), %bb.6(0x40000000)
592 %29:_(s32) = G_PHI %30(s32), %bb.4, %3(s32), %bb.0
593 %31:_(s1) = G_PHI %32(s1), %bb.4, %26(s1), %bb.0
594 G_BRCOND %31(s1), %bb.5
598 successors: %bb.4(0x04000000), %bb.3(0x7c000000)
600 %33:_(s32) = G_PHI %34(s32), %bb.3, %28(s32), %bb.1
601 %35:_(s32) = G_PHI %36(s32), %bb.3, %18(s32), %bb.1
602 %37:_(s32) = G_PHI %38(s32), %bb.3, %28(s32), %bb.1
603 %39:_(s32) = G_CONSTANT i32 0
604 %40:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %39(s32), %33, %39, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
605 %38:_(s32) = G_ADD %40, %37
606 %41:_(s32) = G_CONSTANT i32 -1
607 %36:_(s32) = G_ADD %35, %41
608 %42:_(s32) = G_CONSTANT i32 4
609 %34:_(s32) = G_ADD %33, %42
610 %43:_(s1) = G_ICMP intpred(ne), %36(s32), %39
611 G_BRCOND %43(s1), %bb.3
615 successors: %bb.2(0x80000000)
617 %44:_(s32) = G_PHI %38(s32), %bb.3
618 %32:_(s1) = G_CONSTANT i1 false
619 %45:_(s1) = G_ICMP intpred(eq), %44(s32), %21
620 %46:_(s1) = G_OR %22, %45
621 %30:_(s32) = G_ZEXT %46(s1)
625 successors: %bb.6(0x80000000)
627 %47:_(s32) = G_ZEXT %22(s1)
628 %48:_(s32) = G_CONSTANT i32 2
629 %49:_(s32) = G_OR %47, %48
632 %50:_(s32) = G_PHI %29(s32), %bb.2, %49(s32), %bb.5
633 %51:_(<4 x s32>), %52:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
634 %53:_(s32) = G_ADD %2, %1
635 %54:_(s32) = G_CONSTANT i32 2
636 %55:_(s32) = G_SHL %53, %54(s32)
637 %56:_(s32) = G_CONSTANT i32 0
638 G_AMDGPU_BUFFER_STORE %50(s32), %52(<4 x s32>), %56(s32), %55, %56, 0, 0, 0 :: (store (s32), align 1, addrspace 8)