1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s
5 declare i1 @llvm.amdgcn.inverse.ballot.i64(i64)
8 define amdgpu_cs void @constant_false_inverse_ballot(ptr addrspace(1) %out) {
9 ; GISEL-LABEL: constant_false_inverse_ballot:
10 ; GISEL: ; %bb.0: ; %entry
11 ; GISEL-NEXT: s_mov_b64 s[0:1], 0
12 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
13 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
14 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
16 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
17 ; GISEL-NEXT: s_endpgm
19 ; SDAG-LABEL: constant_false_inverse_ballot:
20 ; SDAG: ; %bb.0: ; %entry
21 ; SDAG-NEXT: s_mov_b32 s2, 0
22 ; SDAG-NEXT: s_mov_b64 s[0:1], 0
23 ; SDAG-NEXT: v_mov_b32_e32 v3, s2
24 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
25 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
27 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
30 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 0)
31 %sel = select i1 %ballot, i64 1, i64 0
32 store i64 %sel, ptr addrspace(1) %out
38 define amdgpu_cs void @constant_true_inverse_ballot(ptr addrspace(1) %out) {
39 ; GISEL-LABEL: constant_true_inverse_ballot:
40 ; GISEL: ; %bb.0: ; %entry
41 ; GISEL-NEXT: s_mov_b64 s[0:1], -1
42 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
43 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
44 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
46 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
47 ; GISEL-NEXT: s_endpgm
49 ; SDAG-LABEL: constant_true_inverse_ballot:
50 ; SDAG: ; %bb.0: ; %entry
51 ; SDAG-NEXT: s_mov_b32 s2, 0
52 ; SDAG-NEXT: s_mov_b64 s[0:1], -1
53 ; SDAG-NEXT: v_mov_b32_e32 v3, s2
54 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
55 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
57 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
60 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 u0xFFFFFFFFFFFFFFFF)
61 %sel = select i1 %ballot, i64 1, i64 0
62 store i64 %sel, ptr addrspace(1) %out
66 ; Test ballot(u0x0040F8010000)
68 define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) {
69 ; GISEL-LABEL: constant_mask_inverse_ballot:
70 ; GISEL: ; %bb.0: ; %entry
71 ; GISEL-NEXT: s_mov_b32 s0, 0xf8010000
72 ; GISEL-NEXT: s_mov_b32 s1, 64
73 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
74 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
75 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
77 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
78 ; GISEL-NEXT: s_endpgm
80 ; SDAG-LABEL: constant_mask_inverse_ballot:
81 ; SDAG: ; %bb.0: ; %entry
82 ; SDAG-NEXT: s_mov_b32 s0, 0xf8010000
83 ; SDAG-NEXT: s_mov_b32 s2, 0
84 ; SDAG-NEXT: s_mov_b32 s1, 64
85 ; SDAG-NEXT: v_mov_b32_e32 v3, s2
86 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
87 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
89 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
92 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 u0x0040F8010000)
93 %sel = select i1 %ballot, i64 1, i64 0
94 store i64 %sel, ptr addrspace(1) %out
98 ; Test inverse ballot using a vgpr as input
100 define amdgpu_cs void @vgpr_inverse_ballot(i64 %input, ptr addrspace(1) %out) {
101 ; GISEL-LABEL: vgpr_inverse_ballot:
102 ; GISEL: ; %bb.0: ; %entry
103 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0
104 ; GISEL-NEXT: v_readfirstlane_b32 s1, v1
105 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
106 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
107 ; GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
108 ; GISEL-NEXT: s_nop 0
109 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
110 ; GISEL-NEXT: s_endpgm
112 ; SDAG-LABEL: vgpr_inverse_ballot:
113 ; SDAG: ; %bb.0: ; %entry
114 ; SDAG-NEXT: v_readfirstlane_b32 s0, v0
115 ; SDAG-NEXT: v_readfirstlane_b32 s1, v1
116 ; SDAG-NEXT: s_mov_b32 s2, 0
117 ; SDAG-NEXT: v_mov_b32_e32 v1, s2
118 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
119 ; SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
121 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
122 ; SDAG-NEXT: s_endpgm
124 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 %input)
125 %sel = select i1 %ballot, i64 1, i64 0
126 store i64 %sel, ptr addrspace(1) %out
130 define amdgpu_cs void @sgpr_inverse_ballot(i64 inreg %input, ptr addrspace(1) %out) {
131 ; GISEL-LABEL: sgpr_inverse_ballot:
132 ; GISEL: ; %bb.0: ; %entry
133 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
134 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
135 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
136 ; GISEL-NEXT: s_nop 0
137 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
138 ; GISEL-NEXT: s_endpgm
140 ; SDAG-LABEL: sgpr_inverse_ballot:
141 ; SDAG: ; %bb.0: ; %entry
142 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
143 ; SDAG-NEXT: s_mov_b32 s0, 0
144 ; SDAG-NEXT: s_waitcnt_depctr 0xfffe
145 ; SDAG-NEXT: v_mov_b32_e32 v3, s0
146 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
148 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
149 ; SDAG-NEXT: s_endpgm
151 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 %input)
152 %sel = select i1 %ballot, i64 1, i64 0
153 store i64 %sel, ptr addrspace(1) %out
157 ; Test ballot after phi
158 define amdgpu_cs void @phi_uniform(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace(1) %out) {
159 ; GISEL-LABEL: phi_uniform:
160 ; GISEL: ; %bb.0: ; %entry
161 ; GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
162 ; GISEL-NEXT: s_cbranch_scc1 .LBB5_2
163 ; GISEL-NEXT: ; %bb.1: ; %if
164 ; GISEL-NEXT: s_add_u32 s0, s0, 1
165 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
166 ; GISEL-NEXT: .LBB5_2: ; %endif
167 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
168 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
169 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
170 ; GISEL-NEXT: s_nop 0
171 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
172 ; GISEL-NEXT: s_endpgm
174 ; SDAG-LABEL: phi_uniform:
175 ; SDAG: ; %bb.0: ; %entry
176 ; SDAG-NEXT: s_cmp_lg_u64 s[2:3], 0
177 ; SDAG-NEXT: s_cbranch_scc1 .LBB5_2
178 ; SDAG-NEXT: ; %bb.1: ; %if
179 ; SDAG-NEXT: s_add_u32 s0, s0, 1
180 ; SDAG-NEXT: s_addc_u32 s1, s1, 0
181 ; SDAG-NEXT: .LBB5_2: ; %endif
182 ; SDAG-NEXT: s_mov_b32 s2, 0
183 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
184 ; SDAG-NEXT: v_mov_b32_e32 v3, s2
185 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
187 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
188 ; SDAG-NEXT: s_endpgm
190 %cc = icmp ne i64 %s2, 0
191 br i1 %cc, label %endif, label %if
194 %tmp = add i64 %s0_1, 1
198 %input = phi i64 [ %s0_1, %entry ], [ %tmp, %if ]
200 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 %input)
201 %sel = select i1 %ballot, i64 1, i64 0
202 store i64 %sel, ptr addrspace(1) %out
207 ; GISel implementation is currently incorrect.
208 ; The change in the branch affects all lanes, not just the branching ones.
209 ; This test will be fixed once GISel correctly takes uniformity analysis into account.
210 define amdgpu_cs void @inverse_ballot_branch(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace(1) %out) {
211 ; GISEL-LABEL: inverse_ballot_branch:
212 ; GISEL: ; %bb.0: ; %entry
213 ; GISEL-NEXT: s_xor_b64 s[4:5], s[2:3], -1
214 ; GISEL-NEXT: s_and_saveexec_b64 s[2:3], s[4:5]
215 ; GISEL-NEXT: ; %bb.1: ; %if
216 ; GISEL-NEXT: s_add_u32 s0, s0, 1
217 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
218 ; GISEL-NEXT: ; %bb.2: ; %endif
219 ; GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
220 ; GISEL-NEXT: v_mov_b32_e32 v3, s1
221 ; GISEL-NEXT: v_mov_b32_e32 v2, s0
222 ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
223 ; GISEL-NEXT: s_nop 0
224 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
225 ; GISEL-NEXT: s_endpgm
227 ; SDAG-LABEL: inverse_ballot_branch:
228 ; SDAG: ; %bb.0: ; %entry
229 ; SDAG-NEXT: v_mov_b32_e32 v3, s1
230 ; SDAG-NEXT: v_mov_b32_e32 v2, s0
231 ; SDAG-NEXT: s_xor_b64 s[4:5], s[2:3], -1
232 ; SDAG-NEXT: s_and_saveexec_b64 s[2:3], s[4:5]
233 ; SDAG-NEXT: ; %bb.1: ; %if
234 ; SDAG-NEXT: s_add_u32 s0, s0, 1
235 ; SDAG-NEXT: s_addc_u32 s1, s1, 0
236 ; SDAG-NEXT: v_mov_b32_e32 v3, s1
237 ; SDAG-NEXT: v_mov_b32_e32 v2, s0
238 ; SDAG-NEXT: ; %bb.2: ; %endif
239 ; SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
240 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
242 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
243 ; SDAG-NEXT: s_endpgm
245 %ballot = call i1 @llvm.amdgcn.inverse.ballot.i64(i64 %s2)
246 br i1 %ballot, label %endif, label %if
249 %tmp = add i64 %s0_1, 1
253 %sel = phi i64 [ %s0_1, %entry ], [ %tmp, %if ]
254 store i64 %sel, ptr addrspace(1) %out