1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
5 define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
6 ; CHECK-SDAG-LABEL: test_readfirstlane_i1:
8 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
10 ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
11 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
12 ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
13 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
14 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
16 ; CHECK-GISEL-LABEL: test_readfirstlane_i1:
17 ; CHECK-GISEL: ; %bb.0:
18 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
20 ; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
21 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
22 ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
23 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
24 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
25 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
26 store i1 %readfirstlane, ptr addrspace(1) %out, align 4
30 define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) {
31 ; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg:
32 ; CHECK-SDAG: ; %bb.0:
33 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; CHECK-SDAG-NEXT: s_and_b32 s4, s16, 1
35 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
36 ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
37 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
38 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
40 ; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg:
41 ; CHECK-GISEL: ; %bb.0:
42 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; CHECK-GISEL-NEXT: s_and_b32 s4, s16, 1
44 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
45 ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
46 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
47 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
48 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
49 store i1 %readfirstlane, ptr addrspace(1) %out, align 4
53 define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
54 ; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
55 ; CHECK-SDAG: ; %bb.0:
56 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57 ; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
58 ; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
59 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4
60 ; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0
61 ; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
62 ; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
63 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
64 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
65 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
67 ; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
68 ; CHECK-GISEL: ; %bb.0:
69 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70 ; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
71 ; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
72 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4
73 ; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4
74 ; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
75 ; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
76 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
77 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
78 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
79 %cmp = icmp ugt i32 %src, 42
80 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
81 %sel = select i1 %readfirstlane, i32 %src, i32 %src1
82 store i32 %sel, ptr addrspace(1) %out, align 4
86 define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) {
87 ; CHECK-SDAG-LABEL: test_readfirstlane_i1_load:
88 ; CHECK-SDAG: ; %bb.0:
89 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; CHECK-SDAG-NEXT: flat_load_ubyte v2, v[2:3]
91 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
92 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
93 ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
94 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
95 ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
96 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
97 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
99 ; CHECK-GISEL-LABEL: test_readfirstlane_i1_load:
100 ; CHECK-GISEL: ; %bb.0:
101 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; CHECK-GISEL-NEXT: flat_load_ubyte v2, v[2:3]
103 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
104 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
105 ; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
106 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
107 ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
108 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
109 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
110 %load = load i1, ptr addrspace(1) %in
111 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load)
112 store i1 %readfirstlane, ptr addrspace(1) %out, align 4
116 define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
117 ; CHECK-SDAG-LABEL: test_readfirstlane_i32:
118 ; CHECK-SDAG: ; %bb.0:
119 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
121 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
122 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
123 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
124 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
126 ; CHECK-GISEL-LABEL: test_readfirstlane_i32:
127 ; CHECK-GISEL: ; %bb.0:
128 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
130 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
131 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
132 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
133 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
134 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src)
135 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
139 define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
140 ; CHECK-SDAG-LABEL: test_readfirstlane_i64:
141 ; CHECK-SDAG: ; %bb.0:
142 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
144 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
145 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
146 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
147 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
148 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
149 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
151 ; CHECK-GISEL-LABEL: test_readfirstlane_i64:
152 ; CHECK-GISEL: ; %bb.0:
153 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
155 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
156 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
157 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
158 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
159 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
160 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
161 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src)
162 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
166 define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
167 ; CHECK-SDAG-LABEL: test_readfirstlane_f64:
168 ; CHECK-SDAG: ; %bb.0:
169 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
171 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
172 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
173 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
174 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
175 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
176 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
178 ; CHECK-GISEL-LABEL: test_readfirstlane_f64:
179 ; CHECK-GISEL: ; %bb.0:
180 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
182 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
183 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
184 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
185 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
186 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
187 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
188 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src)
189 store double %readfirstlane, ptr addrspace(1) %out, align 4
193 define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
194 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
195 ; CHECK-SDAG: ; %bb.0:
196 ; CHECK-SDAG-NEXT: s_mov_b32 s0, 32
197 ; CHECK-SDAG-NEXT: ;;#ASMSTART
198 ; CHECK-SDAG-NEXT: ; use s0
199 ; CHECK-SDAG-NEXT: ;;#ASMEND
200 ; CHECK-SDAG-NEXT: s_endpgm
202 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32:
203 ; CHECK-GISEL: ; %bb.0:
204 ; CHECK-GISEL-NEXT: s_mov_b32 s0, 32
205 ; CHECK-GISEL-NEXT: ;;#ASMSTART
206 ; CHECK-GISEL-NEXT: ; use s0
207 ; CHECK-GISEL-NEXT: ;;#ASMEND
208 ; CHECK-GISEL-NEXT: s_endpgm
209 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
210 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
214 define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
215 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
216 ; CHECK-SDAG: ; %bb.0:
217 ; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32
218 ; CHECK-SDAG-NEXT: ;;#ASMSTART
219 ; CHECK-SDAG-NEXT: ; use s[0:1]
220 ; CHECK-SDAG-NEXT: ;;#ASMEND
221 ; CHECK-SDAG-NEXT: s_endpgm
223 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64:
224 ; CHECK-GISEL: ; %bb.0:
225 ; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 32
226 ; CHECK-GISEL-NEXT: ;;#ASMSTART
227 ; CHECK-GISEL-NEXT: ; use s[0:1]
228 ; CHECK-GISEL-NEXT: ;;#ASMEND
229 ; CHECK-GISEL-NEXT: s_endpgm
230 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
231 call void asm sideeffect "; use $0", "s"(i64 %readfirstlane)
235 define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
236 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
237 ; CHECK-SDAG: ; %bb.0:
238 ; CHECK-SDAG-NEXT: s_mov_b32 s0, 0
239 ; CHECK-SDAG-NEXT: s_mov_b32 s1, 0x40400000
240 ; CHECK-SDAG-NEXT: ;;#ASMSTART
241 ; CHECK-SDAG-NEXT: ; use s[0:1]
242 ; CHECK-SDAG-NEXT: ;;#ASMEND
243 ; CHECK-SDAG-NEXT: s_endpgm
245 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
246 ; CHECK-GISEL: ; %bb.0:
247 ; CHECK-GISEL-NEXT: s_mov_b32 s0, 0
248 ; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000
249 ; CHECK-GISEL-NEXT: ;;#ASMSTART
250 ; CHECK-GISEL-NEXT: ; use s[0:1]
251 ; CHECK-GISEL-NEXT: ;;#ASMEND
252 ; CHECK-GISEL-NEXT: s_endpgm
253 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
254 call void asm sideeffect "; use $0", "s"(double %readfirstlane)
258 define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
259 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
260 ; CHECK-SDAG: ; %bb.0:
261 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
262 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, 32
263 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
264 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
265 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
266 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
267 ; CHECK-SDAG-NEXT: s_endpgm
269 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32:
270 ; CHECK-GISEL: ; %bb.0:
271 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
272 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, 32
273 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
274 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
275 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
276 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
277 ; CHECK-GISEL-NEXT: s_endpgm
278 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
279 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
283 define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
284 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
285 ; CHECK-SDAG: ; %bb.0:
286 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
287 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 32
288 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0
289 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
290 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
291 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
292 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
293 ; CHECK-SDAG-NEXT: s_endpgm
295 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
296 ; CHECK-GISEL: ; %bb.0:
297 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
298 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
299 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
300 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
301 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
302 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
303 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
304 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
305 ; CHECK-GISEL-NEXT: s_endpgm
306 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
307 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
311 define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
312 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
313 ; CHECK-SDAG: ; %bb.0:
314 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
315 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0
316 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000
317 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
318 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
319 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
320 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
321 ; CHECK-SDAG-NEXT: s_endpgm
323 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
324 ; CHECK-GISEL: ; %bb.0:
325 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
326 ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
327 ; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
328 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
329 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
330 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
331 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
332 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
333 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
334 ; CHECK-GISEL-NEXT: s_endpgm
335 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
336 store double %readfirstlane, ptr addrspace(1) %out, align 4
340 define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
341 ; CHECK-SDAG-LABEL: test_readfirstlane_m0:
342 ; CHECK-SDAG: ; %bb.0:
343 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
344 ; CHECK-SDAG-NEXT: ;;#ASMSTART
345 ; CHECK-SDAG-NEXT: s_mov_b32 m0, -1
346 ; CHECK-SDAG-NEXT: ;;#ASMEND
347 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, m0
348 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
349 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
350 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
351 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
352 ; CHECK-SDAG-NEXT: s_endpgm
354 ; CHECK-GISEL-LABEL: test_readfirstlane_m0:
355 ; CHECK-GISEL: ; %bb.0:
356 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
357 ; CHECK-GISEL-NEXT: ;;#ASMSTART
358 ; CHECK-GISEL-NEXT: s_mov_b32 m0, -1
359 ; CHECK-GISEL-NEXT: ;;#ASMEND
360 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, m0
361 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
362 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
363 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
364 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
365 ; CHECK-GISEL-NEXT: s_endpgm
366 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
367 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
368 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
372 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
373 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
374 ; CHECK-SDAG: ; %bb.0:
375 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
376 ; CHECK-SDAG-NEXT: ;;#ASMSTART
377 ; CHECK-SDAG-NEXT: s_mov_b32 s2, 0
378 ; CHECK-SDAG-NEXT: ;;#ASMEND
379 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
380 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
381 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
382 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
383 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
384 ; CHECK-SDAG-NEXT: s_endpgm
386 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32:
387 ; CHECK-GISEL: ; %bb.0:
388 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
389 ; CHECK-GISEL-NEXT: ;;#ASMSTART
390 ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
391 ; CHECK-GISEL-NEXT: ;;#ASMEND
392 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s2
393 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
394 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
395 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
396 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
397 ; CHECK-GISEL-NEXT: s_endpgm
398 %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
399 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr)
400 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
404 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
405 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
406 ; CHECK-SDAG: ; %bb.0:
407 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
408 ; CHECK-SDAG-NEXT: ;;#ASMSTART
409 ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
410 ; CHECK-SDAG-NEXT: ;;#ASMEND
411 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
412 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
413 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
414 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
415 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
416 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
417 ; CHECK-SDAG-NEXT: s_endpgm
419 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
420 ; CHECK-GISEL: ; %bb.0:
421 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
422 ; CHECK-GISEL-NEXT: ;;#ASMSTART
423 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
424 ; CHECK-GISEL-NEXT: ;;#ASMEND
425 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
426 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
427 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
428 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
429 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
430 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
431 ; CHECK-GISEL-NEXT: s_endpgm
432 %sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"()
433 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr)
434 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
438 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
439 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
440 ; CHECK-SDAG: ; %bb.0:
441 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
442 ; CHECK-SDAG-NEXT: ;;#ASMSTART
443 ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
444 ; CHECK-SDAG-NEXT: ;;#ASMEND
445 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
446 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
447 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
448 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
449 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
450 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
451 ; CHECK-SDAG-NEXT: s_endpgm
453 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
454 ; CHECK-GISEL: ; %bb.0:
455 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
456 ; CHECK-GISEL-NEXT: ;;#ASMSTART
457 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
458 ; CHECK-GISEL-NEXT: ;;#ASMEND
459 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
460 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
461 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
462 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
463 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
464 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
465 ; CHECK-GISEL-NEXT: s_endpgm
466 %sgpr = call double asm "s_mov_b64 $0, 0", "=s"()
467 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr)
468 store double %readfirstlane, ptr addrspace(1) %out, align 4
472 define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
473 ; CHECK-SDAG-LABEL: test_readfirstlane_fi:
474 ; CHECK-SDAG: ; %bb.0:
475 ; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s17
476 ; CHECK-SDAG-NEXT: s_addc_u32 s1, s1, 0
477 ; CHECK-SDAG-NEXT: s_mov_b32 s4, 0
478 ; CHECK-SDAG-NEXT: ;;#ASMSTART
479 ; CHECK-SDAG-NEXT: ; use s4
480 ; CHECK-SDAG-NEXT: ;;#ASMEND
481 ; CHECK-SDAG-NEXT: s_endpgm
483 ; CHECK-GISEL-LABEL: test_readfirstlane_fi:
484 ; CHECK-GISEL: ; %bb.0:
485 ; CHECK-GISEL-NEXT: s_add_u32 s0, s0, s17
486 ; CHECK-GISEL-NEXT: s_addc_u32 s1, s1, 0
487 ; CHECK-GISEL-NEXT: s_mov_b32 s4, 0
488 ; CHECK-GISEL-NEXT: ;;#ASMSTART
489 ; CHECK-GISEL-NEXT: ; use s4
490 ; CHECK-GISEL-NEXT: ;;#ASMEND
491 ; CHECK-GISEL-NEXT: s_endpgm
492 %alloca = alloca i32, addrspace(5)
493 %int = ptrtoint ptr addrspace(5) %alloca to i32
494 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
495 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
499 define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) {
500 ; CHECK-SDAG-LABEL: test_readfirstlane_half:
501 ; CHECK-SDAG: ; %bb.0:
502 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
504 ; CHECK-SDAG-NEXT: ;;#ASMSTART
505 ; CHECK-SDAG-NEXT: ; use s4
506 ; CHECK-SDAG-NEXT: ;;#ASMEND
507 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
509 ; CHECK-GISEL-LABEL: test_readfirstlane_half:
510 ; CHECK-GISEL: ; %bb.0:
511 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
513 ; CHECK-GISEL-NEXT: ;;#ASMSTART
514 ; CHECK-GISEL-NEXT: ; use s4
515 ; CHECK-GISEL-NEXT: ;;#ASMEND
516 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
517 %x = call half @llvm.amdgcn.readfirstlane.f16(half %src)
518 call void asm sideeffect "; use $0", "s"(half %x)
522 define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) {
523 ; CHECK-SDAG-LABEL: test_readfirstlane_float:
524 ; CHECK-SDAG: ; %bb.0:
525 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
527 ; CHECK-SDAG-NEXT: ;;#ASMSTART
528 ; CHECK-SDAG-NEXT: ; use s4
529 ; CHECK-SDAG-NEXT: ;;#ASMEND
530 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
532 ; CHECK-GISEL-LABEL: test_readfirstlane_float:
533 ; CHECK-GISEL: ; %bb.0:
534 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
536 ; CHECK-GISEL-NEXT: ;;#ASMSTART
537 ; CHECK-GISEL-NEXT: ; use s4
538 ; CHECK-GISEL-NEXT: ;;#ASMEND
539 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
540 %x = call float @llvm.amdgcn.readfirstlane.f32(float %src)
541 call void asm sideeffect "; use $0", "s"(float %x)
545 define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) {
546 ; CHECK-SDAG-LABEL: test_readfirstlane_bfloat:
547 ; CHECK-SDAG: ; %bb.0:
548 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
550 ; CHECK-SDAG-NEXT: ;;#ASMSTART
551 ; CHECK-SDAG-NEXT: ; use s4
552 ; CHECK-SDAG-NEXT: ;;#ASMEND
553 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
555 ; CHECK-GISEL-LABEL: test_readfirstlane_bfloat:
556 ; CHECK-GISEL: ; %bb.0:
557 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
559 ; CHECK-GISEL-NEXT: ;;#ASMSTART
560 ; CHECK-GISEL-NEXT: ; use s4
561 ; CHECK-GISEL-NEXT: ;;#ASMEND
562 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
563 %x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src)
564 call void asm sideeffect "; use $0", "s"(bfloat %x)
568 define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) {
569 ; CHECK-SDAG-LABEL: test_readfirstlane_i16:
570 ; CHECK-SDAG: ; %bb.0:
571 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
573 ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff
574 ; CHECK-SDAG-NEXT: ;;#ASMSTART
575 ; CHECK-SDAG-NEXT: ; use s4
576 ; CHECK-SDAG-NEXT: ;;#ASMEND
577 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
579 ; CHECK-GISEL-LABEL: test_readfirstlane_i16:
580 ; CHECK-GISEL: ; %bb.0:
581 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
583 ; CHECK-GISEL-NEXT: ;;#ASMSTART
584 ; CHECK-GISEL-NEXT: ; use s4
585 ; CHECK-GISEL-NEXT: ;;#ASMEND
586 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
587 %x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src)
588 call void asm sideeffect "; use $0", "s"(i16 %x)
592 define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) {
593 ; CHECK-SDAG-LABEL: test_readfirstlane_v2f16:
594 ; CHECK-SDAG: ; %bb.0:
595 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
597 ; CHECK-SDAG-NEXT: ;;#ASMSTART
598 ; CHECK-SDAG-NEXT: ; use s4
599 ; CHECK-SDAG-NEXT: ;;#ASMEND
600 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
602 ; CHECK-GISEL-LABEL: test_readfirstlane_v2f16:
603 ; CHECK-GISEL: ; %bb.0:
604 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
606 ; CHECK-GISEL-NEXT: ;;#ASMSTART
607 ; CHECK-GISEL-NEXT: ; use s4
608 ; CHECK-GISEL-NEXT: ;;#ASMEND
609 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
610 %x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src)
611 call void asm sideeffect "; use $0", "s"(<2 x half> %x)
615 define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) {
616 ; CHECK-SDAG-LABEL: test_readfirstlane_v2f32:
617 ; CHECK-SDAG: ; %bb.0:
618 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
620 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
621 ; CHECK-SDAG-NEXT: ;;#ASMSTART
622 ; CHECK-SDAG-NEXT: ; use s[4:5]
623 ; CHECK-SDAG-NEXT: ;;#ASMEND
624 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
626 ; CHECK-GISEL-LABEL: test_readfirstlane_v2f32:
627 ; CHECK-GISEL: ; %bb.0:
628 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
630 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
631 ; CHECK-GISEL-NEXT: ;;#ASMSTART
632 ; CHECK-GISEL-NEXT: ; use s[4:5]
633 ; CHECK-GISEL-NEXT: ;;#ASMEND
634 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
635 %x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src)
636 call void asm sideeffect "; use $0", "s"(<2 x float> %x)
640 define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) {
641 ; CHECK-SDAG-LABEL: test_readfirstlane_v7i32:
642 ; CHECK-SDAG: ; %bb.0:
643 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8
645 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7
646 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6
647 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
648 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
649 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
650 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
651 ; CHECK-SDAG-NEXT: ;;#ASMSTART
652 ; CHECK-SDAG-NEXT: ; use s[4:10]
653 ; CHECK-SDAG-NEXT: ;;#ASMEND
654 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
656 ; CHECK-GISEL-LABEL: test_readfirstlane_v7i32:
657 ; CHECK-GISEL: ; %bb.0:
658 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
660 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
661 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
662 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
663 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
664 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
665 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
666 ; CHECK-GISEL-NEXT: ;;#ASMSTART
667 ; CHECK-GISEL-NEXT: ; use s[4:10]
668 ; CHECK-GISEL-NEXT: ;;#ASMEND
669 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
670 %x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src)
671 call void asm sideeffect "; use $0", "s"(<7 x i32> %x)
675 define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
676 ; CHECK-SDAG-LABEL: test_readfirstlane_v8i16:
677 ; CHECK-SDAG: ; %bb.0:
678 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
680 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
681 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
682 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
683 ; CHECK-SDAG-NEXT: ;;#ASMSTART
684 ; CHECK-SDAG-NEXT: ; use s[4:7]
685 ; CHECK-SDAG-NEXT: ;;#ASMEND
686 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
688 ; CHECK-GISEL-LABEL: test_readfirstlane_v8i16:
689 ; CHECK-GISEL: ; %bb.0:
690 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
692 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
693 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
694 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
695 ; CHECK-GISEL-NEXT: ;;#ASMSTART
696 ; CHECK-GISEL-NEXT: ; use s[4:7]
697 ; CHECK-GISEL-NEXT: ;;#ASMEND
698 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
699 %x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src)
700 call void asm sideeffect "; use $0", "s"(<8 x i16> %x)