1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
5 declare i32 @llvm.amdgcn.readfirstlane(i32) #0
6 declare i64 @llvm.amdgcn.readfirstlane.i64(i64) #0
7 declare double @llvm.amdgcn.readfirstlane.f64(double) #0
9 define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
10 ; CHECK-SDAG-LABEL: test_readfirstlane_i32:
11 ; CHECK-SDAG: ; %bb.0:
12 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
14 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
15 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
16 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
17 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
19 ; CHECK-GISEL-LABEL: test_readfirstlane_i32:
20 ; CHECK-GISEL: ; %bb.0:
21 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
23 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
24 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
25 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
26 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
27 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src)
28 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
32 define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
33 ; CHECK-SDAG-LABEL: test_readfirstlane_i64:
34 ; CHECK-SDAG: ; %bb.0:
35 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
37 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
38 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
39 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
40 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
41 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
42 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
44 ; CHECK-GISEL-LABEL: test_readfirstlane_i64:
45 ; CHECK-GISEL: ; %bb.0:
46 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
48 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
49 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
50 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
51 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
52 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
53 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
54 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src)
55 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
59 define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
60 ; CHECK-SDAG-LABEL: test_readfirstlane_f64:
61 ; CHECK-SDAG: ; %bb.0:
62 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
64 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
65 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
66 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
67 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
68 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
69 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
71 ; CHECK-GISEL-LABEL: test_readfirstlane_f64:
72 ; CHECK-GISEL: ; %bb.0:
73 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
75 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
76 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
77 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
78 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
79 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
80 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
81 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src)
82 store double %readfirstlane, ptr addrspace(1) %out, align 4
86 define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1 {
87 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
88 ; CHECK-SDAG: ; %bb.0:
89 ; CHECK-SDAG-NEXT: s_mov_b32 s0, 32
90 ; CHECK-SDAG-NEXT: ;;#ASMSTART
91 ; CHECK-SDAG-NEXT: ; use s0
92 ; CHECK-SDAG-NEXT: ;;#ASMEND
93 ; CHECK-SDAG-NEXT: s_endpgm
95 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32:
96 ; CHECK-GISEL: ; %bb.0:
97 ; CHECK-GISEL-NEXT: s_mov_b32 s0, 32
98 ; CHECK-GISEL-NEXT: ;;#ASMSTART
99 ; CHECK-GISEL-NEXT: ; use s0
100 ; CHECK-GISEL-NEXT: ;;#ASMEND
101 ; CHECK-GISEL-NEXT: s_endpgm
102 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
103 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
107 define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1 {
108 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
109 ; CHECK-SDAG: ; %bb.0:
110 ; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32
111 ; CHECK-SDAG-NEXT: ;;#ASMSTART
112 ; CHECK-SDAG-NEXT: ; use s[0:1]
113 ; CHECK-SDAG-NEXT: ;;#ASMEND
114 ; CHECK-SDAG-NEXT: s_endpgm
116 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64:
117 ; CHECK-GISEL: ; %bb.0:
118 ; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 32
119 ; CHECK-GISEL-NEXT: ;;#ASMSTART
120 ; CHECK-GISEL-NEXT: ; use s[0:1]
121 ; CHECK-GISEL-NEXT: ;;#ASMEND
122 ; CHECK-GISEL-NEXT: s_endpgm
123 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
124 call void asm sideeffect "; use $0", "s"(i64 %readfirstlane)
128 define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1 {
129 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
130 ; CHECK-SDAG: ; %bb.0:
131 ; CHECK-SDAG-NEXT: s_mov_b32 s0, 0
132 ; CHECK-SDAG-NEXT: s_mov_b32 s1, 0x40400000
133 ; CHECK-SDAG-NEXT: ;;#ASMSTART
134 ; CHECK-SDAG-NEXT: ; use s[0:1]
135 ; CHECK-SDAG-NEXT: ;;#ASMEND
136 ; CHECK-SDAG-NEXT: s_endpgm
138 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
139 ; CHECK-GISEL: ; %bb.0:
140 ; CHECK-GISEL-NEXT: s_mov_b32 s0, 0
141 ; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000
142 ; CHECK-GISEL-NEXT: ;;#ASMSTART
143 ; CHECK-GISEL-NEXT: ; use s[0:1]
144 ; CHECK-GISEL-NEXT: ;;#ASMEND
145 ; CHECK-GISEL-NEXT: s_endpgm
146 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
147 call void asm sideeffect "; use $0", "s"(double %readfirstlane)
151 define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) #1 {
152 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
153 ; CHECK-SDAG: ; %bb.0:
154 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
155 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, 32
156 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
157 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
158 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
159 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
160 ; CHECK-SDAG-NEXT: s_endpgm
162 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32:
163 ; CHECK-GISEL: ; %bb.0:
164 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
165 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, 32
166 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
167 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
168 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
169 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
170 ; CHECK-GISEL-NEXT: s_endpgm
171 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
172 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
176 define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) #1 {
177 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
178 ; CHECK-SDAG: ; %bb.0:
179 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
180 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 32
181 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0
182 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
183 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
184 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
185 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
186 ; CHECK-SDAG-NEXT: s_endpgm
188 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
189 ; CHECK-GISEL: ; %bb.0:
190 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
191 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
192 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
193 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
194 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
195 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
196 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
197 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
198 ; CHECK-GISEL-NEXT: s_endpgm
199 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
200 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
204 define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) #1 {
205 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
206 ; CHECK-SDAG: ; %bb.0:
207 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
208 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0
209 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000
210 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
211 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
212 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
213 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
214 ; CHECK-SDAG-NEXT: s_endpgm
216 ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
217 ; CHECK-GISEL: ; %bb.0:
218 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
219 ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
220 ; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
221 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
222 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
223 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
224 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
225 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
226 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
227 ; CHECK-GISEL-NEXT: s_endpgm
228 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
229 store double %readfirstlane, ptr addrspace(1) %out, align 4
233 define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
234 ; CHECK-SDAG-LABEL: test_readfirstlane_m0:
235 ; CHECK-SDAG: ; %bb.0:
236 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
237 ; CHECK-SDAG-NEXT: ;;#ASMSTART
238 ; CHECK-SDAG-NEXT: s_mov_b32 m0, -1
239 ; CHECK-SDAG-NEXT: ;;#ASMEND
240 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, m0
241 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
242 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
243 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
244 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
245 ; CHECK-SDAG-NEXT: s_endpgm
247 ; CHECK-GISEL-LABEL: test_readfirstlane_m0:
248 ; CHECK-GISEL: ; %bb.0:
249 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
250 ; CHECK-GISEL-NEXT: ;;#ASMSTART
251 ; CHECK-GISEL-NEXT: s_mov_b32 m0, -1
252 ; CHECK-GISEL-NEXT: ;;#ASMEND
253 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, m0
254 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
255 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
256 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
257 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
258 ; CHECK-GISEL-NEXT: s_endpgm
259 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
260 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
261 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
265 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) #1 {
266 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
267 ; CHECK-SDAG: ; %bb.0:
268 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
269 ; CHECK-SDAG-NEXT: ;;#ASMSTART
270 ; CHECK-SDAG-NEXT: s_mov_b32 s2, 0
271 ; CHECK-SDAG-NEXT: ;;#ASMEND
272 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
273 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
274 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
275 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
276 ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
277 ; CHECK-SDAG-NEXT: s_endpgm
279 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32:
280 ; CHECK-GISEL: ; %bb.0:
281 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
282 ; CHECK-GISEL-NEXT: ;;#ASMSTART
283 ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
284 ; CHECK-GISEL-NEXT: ;;#ASMEND
285 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s2
286 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
287 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
288 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
289 ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
290 ; CHECK-GISEL-NEXT: s_endpgm
291 %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
292 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr)
293 store i32 %readfirstlane, ptr addrspace(1) %out, align 4
297 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) #1 {
298 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
299 ; CHECK-SDAG: ; %bb.0:
300 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
301 ; CHECK-SDAG-NEXT: ;;#ASMSTART
302 ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
303 ; CHECK-SDAG-NEXT: ;;#ASMEND
304 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
305 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
306 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
307 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
308 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
309 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
310 ; CHECK-SDAG-NEXT: s_endpgm
312 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
313 ; CHECK-GISEL: ; %bb.0:
314 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
315 ; CHECK-GISEL-NEXT: ;;#ASMSTART
316 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
317 ; CHECK-GISEL-NEXT: ;;#ASMEND
318 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
319 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
320 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
321 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
322 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
323 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
324 ; CHECK-GISEL-NEXT: s_endpgm
325 %sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"()
326 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr)
327 store i64 %readfirstlane, ptr addrspace(1) %out, align 4
331 define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) #1 {
332 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
333 ; CHECK-SDAG: ; %bb.0:
334 ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
335 ; CHECK-SDAG-NEXT: ;;#ASMSTART
336 ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
337 ; CHECK-SDAG-NEXT: ;;#ASMEND
338 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
339 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
340 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
341 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
342 ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
343 ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
344 ; CHECK-SDAG-NEXT: s_endpgm
346 ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
347 ; CHECK-GISEL: ; %bb.0:
348 ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
349 ; CHECK-GISEL-NEXT: ;;#ASMSTART
350 ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
351 ; CHECK-GISEL-NEXT: ;;#ASMEND
352 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
353 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
354 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
355 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
356 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
357 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
358 ; CHECK-GISEL-NEXT: s_endpgm
359 %sgpr = call double asm "s_mov_b64 $0, 0", "=s"()
360 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr)
361 store double %readfirstlane, ptr addrspace(1) %out, align 4
365 define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 {
366 ; CHECK-SDAG-LABEL: test_readfirstlane_fi:
367 ; CHECK-SDAG: ; %bb.0:
368 ; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s15
369 ; CHECK-SDAG-NEXT: s_addc_u32 s1, s1, 0
370 ; CHECK-SDAG-NEXT: s_mov_b32 s4, 0
371 ; CHECK-SDAG-NEXT: ;;#ASMSTART
372 ; CHECK-SDAG-NEXT: ; use s4
373 ; CHECK-SDAG-NEXT: ;;#ASMEND
374 ; CHECK-SDAG-NEXT: s_endpgm
376 ; CHECK-GISEL-LABEL: test_readfirstlane_fi:
377 ; CHECK-GISEL: ; %bb.0:
378 ; CHECK-GISEL-NEXT: s_add_u32 s0, s0, s15
379 ; CHECK-GISEL-NEXT: s_addc_u32 s1, s1, 0
380 ; CHECK-GISEL-NEXT: s_mov_b32 s4, 0
381 ; CHECK-GISEL-NEXT: ;;#ASMSTART
382 ; CHECK-GISEL-NEXT: ; use s4
383 ; CHECK-GISEL-NEXT: ;;#ASMEND
384 ; CHECK-GISEL-NEXT: s_endpgm
385 %alloca = alloca i32, addrspace(5)
386 %int = ptrtoint ptr addrspace(5) %alloca to i32
387 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
388 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
392 define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) {
393 ; CHECK-SDAG-LABEL: test_readfirstlane_half:
394 ; CHECK-SDAG: ; %bb.0:
395 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
397 ; CHECK-SDAG-NEXT: ;;#ASMSTART
398 ; CHECK-SDAG-NEXT: ; use s4
399 ; CHECK-SDAG-NEXT: ;;#ASMEND
400 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
402 ; CHECK-GISEL-LABEL: test_readfirstlane_half:
403 ; CHECK-GISEL: ; %bb.0:
404 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
406 ; CHECK-GISEL-NEXT: ;;#ASMSTART
407 ; CHECK-GISEL-NEXT: ; use s4
408 ; CHECK-GISEL-NEXT: ;;#ASMEND
409 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
410 %x = call half @llvm.amdgcn.readfirstlane.f16(half %src)
411 call void asm sideeffect "; use $0", "s"(half %x)
415 define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) {
416 ; CHECK-SDAG-LABEL: test_readfirstlane_float:
417 ; CHECK-SDAG: ; %bb.0:
418 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
420 ; CHECK-SDAG-NEXT: ;;#ASMSTART
421 ; CHECK-SDAG-NEXT: ; use s4
422 ; CHECK-SDAG-NEXT: ;;#ASMEND
423 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
425 ; CHECK-GISEL-LABEL: test_readfirstlane_float:
426 ; CHECK-GISEL: ; %bb.0:
427 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
429 ; CHECK-GISEL-NEXT: ;;#ASMSTART
430 ; CHECK-GISEL-NEXT: ; use s4
431 ; CHECK-GISEL-NEXT: ;;#ASMEND
432 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
433 %x = call float @llvm.amdgcn.readfirstlane.f32(float %src)
434 call void asm sideeffect "; use $0", "s"(float %x)
438 define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) {
439 ; CHECK-SDAG-LABEL: test_readfirstlane_bfloat:
440 ; CHECK-SDAG: ; %bb.0:
441 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
443 ; CHECK-SDAG-NEXT: ;;#ASMSTART
444 ; CHECK-SDAG-NEXT: ; use s4
445 ; CHECK-SDAG-NEXT: ;;#ASMEND
446 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
448 ; CHECK-GISEL-LABEL: test_readfirstlane_bfloat:
449 ; CHECK-GISEL: ; %bb.0:
450 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
452 ; CHECK-GISEL-NEXT: ;;#ASMSTART
453 ; CHECK-GISEL-NEXT: ; use s4
454 ; CHECK-GISEL-NEXT: ;;#ASMEND
455 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
456 %x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src)
457 call void asm sideeffect "; use $0", "s"(bfloat %x)
461 define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) {
462 ; CHECK-SDAG-LABEL: test_readfirstlane_i16:
463 ; CHECK-SDAG: ; %bb.0:
464 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
466 ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff
467 ; CHECK-SDAG-NEXT: ;;#ASMSTART
468 ; CHECK-SDAG-NEXT: ; use s4
469 ; CHECK-SDAG-NEXT: ;;#ASMEND
470 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
472 ; CHECK-GISEL-LABEL: test_readfirstlane_i16:
473 ; CHECK-GISEL: ; %bb.0:
474 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
476 ; CHECK-GISEL-NEXT: ;;#ASMSTART
477 ; CHECK-GISEL-NEXT: ; use s4
478 ; CHECK-GISEL-NEXT: ;;#ASMEND
479 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
480 %x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src)
481 call void asm sideeffect "; use $0", "s"(i16 %x)
485 define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) {
486 ; CHECK-SDAG-LABEL: test_readfirstlane_v2f16:
487 ; CHECK-SDAG: ; %bb.0:
488 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
490 ; CHECK-SDAG-NEXT: ;;#ASMSTART
491 ; CHECK-SDAG-NEXT: ; use s4
492 ; CHECK-SDAG-NEXT: ;;#ASMEND
493 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
495 ; CHECK-GISEL-LABEL: test_readfirstlane_v2f16:
496 ; CHECK-GISEL: ; %bb.0:
497 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
499 ; CHECK-GISEL-NEXT: ;;#ASMSTART
500 ; CHECK-GISEL-NEXT: ; use s4
501 ; CHECK-GISEL-NEXT: ;;#ASMEND
502 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
503 %x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src)
504 call void asm sideeffect "; use $0", "s"(<2 x half> %x)
508 define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) {
509 ; CHECK-SDAG-LABEL: test_readfirstlane_v2f32:
510 ; CHECK-SDAG: ; %bb.0:
511 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
513 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
514 ; CHECK-SDAG-NEXT: ;;#ASMSTART
515 ; CHECK-SDAG-NEXT: ; use s[4:5]
516 ; CHECK-SDAG-NEXT: ;;#ASMEND
517 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
519 ; CHECK-GISEL-LABEL: test_readfirstlane_v2f32:
520 ; CHECK-GISEL: ; %bb.0:
521 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
523 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
524 ; CHECK-GISEL-NEXT: ;;#ASMSTART
525 ; CHECK-GISEL-NEXT: ; use s[4:5]
526 ; CHECK-GISEL-NEXT: ;;#ASMEND
527 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
528 %x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src)
529 call void asm sideeffect "; use $0", "s"(<2 x float> %x)
533 define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) {
534 ; CHECK-SDAG-LABEL: test_readfirstlane_v7i32:
535 ; CHECK-SDAG: ; %bb.0:
536 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8
538 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7
539 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6
540 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
541 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
542 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
543 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
544 ; CHECK-SDAG-NEXT: ;;#ASMSTART
545 ; CHECK-SDAG-NEXT: ; use s[4:10]
546 ; CHECK-SDAG-NEXT: ;;#ASMEND
547 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
549 ; CHECK-GISEL-LABEL: test_readfirstlane_v7i32:
550 ; CHECK-GISEL: ; %bb.0:
551 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
553 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
554 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
555 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
556 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
557 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
558 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
559 ; CHECK-GISEL-NEXT: ;;#ASMSTART
560 ; CHECK-GISEL-NEXT: ; use s[4:10]
561 ; CHECK-GISEL-NEXT: ;;#ASMEND
562 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
563 %x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src)
564 call void asm sideeffect "; use $0", "s"(<7 x i32> %x)
568 define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
569 ; CHECK-SDAG-LABEL: test_readfirstlane_v8i16:
570 ; CHECK-SDAG: ; %bb.0:
571 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
573 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
574 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
575 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
576 ; CHECK-SDAG-NEXT: ;;#ASMSTART
577 ; CHECK-SDAG-NEXT: ; use s[4:7]
578 ; CHECK-SDAG-NEXT: ;;#ASMEND
579 ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
581 ; CHECK-GISEL-LABEL: test_readfirstlane_v8i16:
582 ; CHECK-GISEL: ; %bb.0:
583 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
585 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
586 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
587 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
588 ; CHECK-GISEL-NEXT: ;;#ASMSTART
589 ; CHECK-GISEL-NEXT: ; use s[4:7]
590 ; CHECK-GISEL-NEXT: ;;#ASMEND
591 ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
592 %x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src)
593 call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
597 attributes #0 = { nounwind readnone convergent }
598 attributes #1 = { nounwind }