1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
7 define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) #0 {
8 ; GFX7-LABEL: test_writelane_s_s_s:
10 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
11 ; GFX7-NEXT: s_mov_b32 m0, s3
12 ; GFX7-NEXT: v_writelane_b32 v0, s2, m0
13 ; GFX7-NEXT: ; return to shader part epilog
15 ; GFX8-LABEL: test_writelane_s_s_s:
17 ; GFX8-NEXT: v_mov_b32_e32 v0, s4
18 ; GFX8-NEXT: s_mov_b32 m0, s3
19 ; GFX8-NEXT: v_writelane_b32 v0, s2, m0
20 ; GFX8-NEXT: ; return to shader part epilog
22 ; GFX10-LABEL: test_writelane_s_s_s:
24 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
25 ; GFX10-NEXT: v_writelane_b32 v0, s2, s3
26 ; GFX10-NEXT: ; return to shader part epilog
27 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
28 %writelane.cast = bitcast i32 %writelane to float
29 ret float %writelane.cast
32 define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) #0 {
33 ; GFX7-LABEL: test_writelane_s_s_imm:
35 ; GFX7-NEXT: v_mov_b32_e32 v0, 42
36 ; GFX7-NEXT: s_mov_b32 m0, s3
37 ; GFX7-NEXT: v_writelane_b32 v0, s2, m0
38 ; GFX7-NEXT: ; return to shader part epilog
40 ; GFX8-LABEL: test_writelane_s_s_imm:
42 ; GFX8-NEXT: v_mov_b32_e32 v0, 42
43 ; GFX8-NEXT: s_mov_b32 m0, s3
44 ; GFX8-NEXT: v_writelane_b32 v0, s2, m0
45 ; GFX8-NEXT: ; return to shader part epilog
47 ; GFX10-LABEL: test_writelane_s_s_imm:
49 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
50 ; GFX10-NEXT: v_writelane_b32 v0, s2, s3
51 ; GFX10-NEXT: ; return to shader part epilog
52 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 42)
53 %writelane.cast = bitcast i32 %writelane to float
54 ret float %writelane.cast
57 ; data is not inline imm
58 define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
59 ; GFX7-LABEL: test_writelane_k_s_v:
61 ; GFX7-NEXT: s_movk_i32 s0, 0x3e7
62 ; GFX7-NEXT: s_mov_b32 m0, s2
63 ; GFX7-NEXT: v_writelane_b32 v0, s0, m0
64 ; GFX7-NEXT: ; return to shader part epilog
66 ; GFX8-LABEL: test_writelane_k_s_v:
68 ; GFX8-NEXT: s_movk_i32 s0, 0x3e7
69 ; GFX8-NEXT: s_mov_b32 m0, s2
70 ; GFX8-NEXT: v_writelane_b32 v0, s0, m0
71 ; GFX8-NEXT: ; return to shader part epilog
73 ; GFX10-LABEL: test_writelane_k_s_v:
75 ; GFX10-NEXT: s_movk_i32 s0, 0x3e7
76 ; GFX10-NEXT: v_writelane_b32 v0, s0, s2
77 ; GFX10-NEXT: ; return to shader part epilog
78 %writelane = call i32 @llvm.amdgcn.writelane(i32 999, i32 %lane, i32 %vdst.in)
79 %writelane.cast = bitcast i32 %writelane to float
80 ret float %writelane.cast
84 define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
85 ; GFX7-LABEL: test_writelane_imm_s_v:
87 ; GFX7-NEXT: v_writelane_b32 v0, 42, s2
88 ; GFX7-NEXT: ; return to shader part epilog
90 ; GFX8-LABEL: test_writelane_imm_s_v:
92 ; GFX8-NEXT: v_writelane_b32 v0, 42, s2
93 ; GFX8-NEXT: ; return to shader part epilog
95 ; GFX10-LABEL: test_writelane_imm_s_v:
97 ; GFX10-NEXT: v_writelane_b32 v0, 42, s2
98 ; GFX10-NEXT: ; return to shader part epilog
99 %writelane = call i32 @llvm.amdgcn.writelane(i32 42, i32 %lane, i32 %vdst.in)
100 %writelane.cast = bitcast i32 %writelane to float
101 ret float %writelane.cast
104 ; Data is subtarget dependent inline imm
105 define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
106 ; GFX7-LABEL: test_writelane_imminv2pi_s_v:
108 ; GFX7-NEXT: s_mov_b32 s0, 0x3e22f983
109 ; GFX7-NEXT: s_mov_b32 m0, s2
110 ; GFX7-NEXT: v_writelane_b32 v0, s0, m0
111 ; GFX7-NEXT: ; return to shader part epilog
113 ; GFX8-LABEL: test_writelane_imminv2pi_s_v:
115 ; GFX8-NEXT: v_writelane_b32 v0, 0.15915494, s2
116 ; GFX8-NEXT: ; return to shader part epilog
118 ; GFX10-LABEL: test_writelane_imminv2pi_s_v:
120 ; GFX10-NEXT: v_writelane_b32 v0, 0.15915494, s2
121 ; GFX10-NEXT: ; return to shader part epilog
122 %writelane = call i32 @llvm.amdgcn.writelane(i32 bitcast (float 0x3FC45F3060000000 to i32), i32 %lane, i32 %vdst.in)
123 %writelane.cast = bitcast i32 %writelane to float
124 ret float %writelane.cast
129 define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0 {
130 ; GFX7-LABEL: test_writelane_s_imm_v:
132 ; GFX7-NEXT: v_writelane_b32 v0, s2, 23
133 ; GFX7-NEXT: ; return to shader part epilog
135 ; GFX8-LABEL: test_writelane_s_imm_v:
137 ; GFX8-NEXT: v_writelane_b32 v0, s2, 23
138 ; GFX8-NEXT: ; return to shader part epilog
140 ; GFX10-LABEL: test_writelane_s_imm_v:
142 ; GFX10-NEXT: v_writelane_b32 v0, s2, 23
143 ; GFX10-NEXT: ; return to shader part epilog
144 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 23, i32 %vdst.in)
145 %writelane.cast = bitcast i32 %writelane to float
146 ret float %writelane.cast
149 ; Lane index is larger than the wavesize
150 define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0 {
151 ; GFX7-LABEL: test_writelane_s_k0_v:
153 ; GFX7-NEXT: v_writelane_b32 v0, s2, 3
154 ; GFX7-NEXT: ; return to shader part epilog
156 ; GFX8-LABEL: test_writelane_s_k0_v:
158 ; GFX8-NEXT: v_writelane_b32 v0, s2, 3
159 ; GFX8-NEXT: ; return to shader part epilog
161 ; GFX10-LABEL: test_writelane_s_k0_v:
163 ; GFX10-NEXT: s_movk_i32 s0, 0x43
164 ; GFX10-NEXT: v_writelane_b32 v0, s2, s0
165 ; GFX10-NEXT: ; return to shader part epilog
166 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 67, i32 %vdst.in)
167 %writelane.cast = bitcast i32 %writelane to float
168 ret float %writelane.cast
171 ; Lane index is larger than the wavesize for wave32
172 define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0 {
173 ; GFX7-LABEL: test_writelane_s_k1_v:
175 ; GFX7-NEXT: v_writelane_b32 v0, s2, 32
176 ; GFX7-NEXT: ; return to shader part epilog
178 ; GFX8-LABEL: test_writelane_s_k1_v:
180 ; GFX8-NEXT: v_writelane_b32 v0, s2, 32
181 ; GFX8-NEXT: ; return to shader part epilog
183 ; GFX10-LABEL: test_writelane_s_k1_v:
185 ; GFX10-NEXT: v_writelane_b32 v0, s2, 32
186 ; GFX10-NEXT: ; return to shader part epilog
187 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 32, i32 %vdst.in)
188 %writelane.cast = bitcast i32 %writelane to float
189 ret float %writelane.cast
192 define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) #0 {
193 ; GFX7-LABEL: test_writelane_v_v_v:
195 ; GFX7-NEXT: v_readfirstlane_b32 s1, v1
196 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
197 ; GFX7-NEXT: s_mov_b32 m0, s1
198 ; GFX7-NEXT: v_writelane_b32 v2, s0, m0
199 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
200 ; GFX7-NEXT: ; return to shader part epilog
202 ; GFX8-LABEL: test_writelane_v_v_v:
204 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
205 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
206 ; GFX8-NEXT: s_mov_b32 m0, s1
207 ; GFX8-NEXT: v_writelane_b32 v2, s0, m0
208 ; GFX8-NEXT: v_mov_b32_e32 v0, v2
209 ; GFX8-NEXT: ; return to shader part epilog
211 ; GFX10-LABEL: test_writelane_v_v_v:
213 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
214 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
215 ; GFX10-NEXT: v_writelane_b32 v2, s0, s1
216 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
217 ; GFX10-NEXT: ; return to shader part epilog
218 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
219 %writelane.cast = bitcast i32 %writelane to float
220 ret float %writelane.cast
223 define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) #0 {
224 ; GFX7-LABEL: test_writelane_v_s_v:
226 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
227 ; GFX7-NEXT: s_mov_b32 m0, s2
228 ; GFX7-NEXT: v_writelane_b32 v1, s0, m0
229 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
230 ; GFX7-NEXT: ; return to shader part epilog
232 ; GFX8-LABEL: test_writelane_v_s_v:
234 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
235 ; GFX8-NEXT: s_mov_b32 m0, s2
236 ; GFX8-NEXT: v_writelane_b32 v1, s0, m0
237 ; GFX8-NEXT: v_mov_b32_e32 v0, v1
238 ; GFX8-NEXT: ; return to shader part epilog
240 ; GFX10-LABEL: test_writelane_v_s_v:
242 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
243 ; GFX10-NEXT: v_writelane_b32 v1, s0, s2
244 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
245 ; GFX10-NEXT: ; return to shader part epilog
246 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 inreg %lane, i32 %vdst.in)
247 %writelane.cast = bitcast i32 %writelane to float
248 ret float %writelane.cast
251 ; FIXME: This could theoretically use m0 directly as the data source,
252 ; and another SGPR as the lane selector and avoid register swap.
253 define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
254 ; GFX7-LABEL: test_writelane_m0_s_v:
256 ; GFX7-NEXT: ;;#ASMSTART
257 ; GFX7-NEXT: s_mov_b32 m0, -1
258 ; GFX7-NEXT: ;;#ASMEND
259 ; GFX7-NEXT: s_mov_b32 s0, m0
260 ; GFX7-NEXT: s_mov_b32 m0, s2
261 ; GFX7-NEXT: v_writelane_b32 v0, s0, m0
262 ; GFX7-NEXT: ; return to shader part epilog
264 ; GFX8-LABEL: test_writelane_m0_s_v:
266 ; GFX8-NEXT: ;;#ASMSTART
267 ; GFX8-NEXT: s_mov_b32 m0, -1
268 ; GFX8-NEXT: ;;#ASMEND
269 ; GFX8-NEXT: s_mov_b32 s0, m0
270 ; GFX8-NEXT: s_mov_b32 m0, s2
271 ; GFX8-NEXT: v_writelane_b32 v0, s0, m0
272 ; GFX8-NEXT: ; return to shader part epilog
274 ; GFX10-LABEL: test_writelane_m0_s_v:
276 ; GFX10-NEXT: ;;#ASMSTART
277 ; GFX10-NEXT: s_mov_b32 m0, -1
278 ; GFX10-NEXT: ;;#ASMEND
279 ; GFX10-NEXT: v_writelane_b32 v0, m0, s2
280 ; GFX10-NEXT: ; return to shader part epilog
281 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
282 %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %lane, i32 %vdst.in)
283 %writelane.cast = bitcast i32 %writelane to float
284 ret float %writelane.cast
287 define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0 {
288 ; GFX7-LABEL: test_writelane_s_m0_v:
290 ; GFX7-NEXT: ;;#ASMSTART
291 ; GFX7-NEXT: s_mov_b32 m0, -1
292 ; GFX7-NEXT: ;;#ASMEND
293 ; GFX7-NEXT: v_writelane_b32 v0, s2, m0
294 ; GFX7-NEXT: ; return to shader part epilog
296 ; GFX8-LABEL: test_writelane_s_m0_v:
298 ; GFX8-NEXT: ;;#ASMSTART
299 ; GFX8-NEXT: s_mov_b32 m0, -1
300 ; GFX8-NEXT: ;;#ASMEND
301 ; GFX8-NEXT: v_writelane_b32 v0, s2, m0
302 ; GFX8-NEXT: ; return to shader part epilog
304 ; GFX10-LABEL: test_writelane_s_m0_v:
306 ; GFX10-NEXT: ;;#ASMSTART
307 ; GFX10-NEXT: s_mov_b32 m0, -1
308 ; GFX10-NEXT: ;;#ASMEND
309 ; GFX10-NEXT: v_writelane_b32 v0, s2, m0
310 ; GFX10-NEXT: ; return to shader part epilog
311 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
312 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %m0, i32 %vdst.in)
313 %writelane.cast = bitcast i32 %writelane to float
314 ret float %writelane.cast
317 declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
318 declare i32 @llvm.amdgcn.workitem.id.x() #2
320 attributes #0 = { nounwind }
321 attributes #1 = { convergent nounwind readnone willreturn }
322 attributes #2 = { nounwind readnone speculatable willreturn }