1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s
4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s
6 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX802-GISEL %s
7 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX1010-GISEL %s
8 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 -global-isel < %s | FileCheck -check-prefixes=GFX1100-GISEL %s
10 declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
11 declare i64 @llvm.amdgcn.writelane.i64(i64, i32, i64) #0
12 declare double @llvm.amdgcn.writelane.f64(double, i32, double) #0
14 define amdgpu_kernel void @test_writelane_sreg_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
15 ; GFX802-SDAG-LABEL: test_writelane_sreg_i32:
16 ; GFX802-SDAG: ; %bb.0:
17 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
18 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
19 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
20 ; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
21 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
22 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
23 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
24 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
25 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
26 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
27 ; GFX802-SDAG-NEXT: s_endpgm
29 ; GFX1010-SDAG-LABEL: test_writelane_sreg_i32:
30 ; GFX1010-SDAG: ; %bb.0:
31 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
32 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
33 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
34 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[0:1], 0x0
35 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
36 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
37 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
38 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
39 ; GFX1010-SDAG-NEXT: s_endpgm
41 ; GFX1100-SDAG-LABEL: test_writelane_sreg_i32:
42 ; GFX1100-SDAG: ; %bb.0:
43 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
44 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
45 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
46 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[0:1], 0x0
47 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
48 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
49 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
50 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
51 ; GFX1100-SDAG-NEXT: s_endpgm
53 ; GFX802-GISEL-LABEL: test_writelane_sreg_i32:
54 ; GFX802-GISEL: ; %bb.0:
55 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
56 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
57 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
58 ; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
59 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
60 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
61 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
62 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
63 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
64 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
65 ; GFX802-GISEL-NEXT: s_endpgm
67 ; GFX1010-GISEL-LABEL: test_writelane_sreg_i32:
68 ; GFX1010-GISEL: ; %bb.0:
69 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
70 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
71 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
72 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[0:1], 0x0
73 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
74 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
75 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
76 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
77 ; GFX1010-GISEL-NEXT: s_endpgm
79 ; GFX1100-GISEL-LABEL: test_writelane_sreg_i32:
80 ; GFX1100-GISEL: ; %bb.0:
81 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
82 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
83 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
84 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[0:1], 0x0
85 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
86 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
87 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
88 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
89 ; GFX1100-GISEL-NEXT: s_endpgm
90 %oldval = load i32, ptr addrspace(1) %out
91 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
92 store i32 %writelane, ptr addrspace(1) %out, align 4
96 define amdgpu_kernel void @test_writelane_sreg_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
97 ; GFX802-SDAG-LABEL: test_writelane_sreg_i64:
98 ; GFX802-SDAG: ; %bb.0:
99 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
100 ; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
101 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
102 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
103 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
104 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
105 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
106 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
107 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
108 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
109 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
110 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
111 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
112 ; GFX802-SDAG-NEXT: s_endpgm
114 ; GFX1010-SDAG-LABEL: test_writelane_sreg_i64:
115 ; GFX1010-SDAG: ; %bb.0:
116 ; GFX1010-SDAG-NEXT: s_clause 0x1
117 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
118 ; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
119 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
120 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
122 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
123 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
124 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
125 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6
126 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6
127 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
128 ; GFX1010-SDAG-NEXT: s_endpgm
130 ; GFX1100-SDAG-LABEL: test_writelane_sreg_i64:
131 ; GFX1100-SDAG: ; %bb.0:
132 ; GFX1100-SDAG-NEXT: s_clause 0x1
133 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
134 ; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10
135 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
136 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
137 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
138 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
139 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
140 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
141 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6
142 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6
143 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
144 ; GFX1100-SDAG-NEXT: s_endpgm
146 ; GFX802-GISEL-LABEL: test_writelane_sreg_i64:
147 ; GFX802-GISEL: ; %bb.0:
148 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
149 ; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
150 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
151 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
152 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
153 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
154 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
155 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
156 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
157 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
158 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
159 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
160 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
161 ; GFX802-GISEL-NEXT: s_endpgm
163 ; GFX1010-GISEL-LABEL: test_writelane_sreg_i64:
164 ; GFX1010-GISEL: ; %bb.0:
165 ; GFX1010-GISEL-NEXT: s_clause 0x1
166 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
167 ; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
168 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
169 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
171 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
172 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
173 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
174 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6
175 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6
176 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
177 ; GFX1010-GISEL-NEXT: s_endpgm
179 ; GFX1100-GISEL-LABEL: test_writelane_sreg_i64:
180 ; GFX1100-GISEL: ; %bb.0:
181 ; GFX1100-GISEL-NEXT: s_clause 0x1
182 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
183 ; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10
184 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
185 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
186 ; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
187 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
188 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
189 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
190 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6
191 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6
192 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
193 ; GFX1100-GISEL-NEXT: s_endpgm
194 %oldval = load i64, ptr addrspace(1) %out
195 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
196 store i64 %writelane, ptr addrspace(1) %out, align 4
200 define amdgpu_kernel void @test_writelane_sreg_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
201 ; GFX802-SDAG-LABEL: test_writelane_sreg_f64:
202 ; GFX802-SDAG: ; %bb.0:
203 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
204 ; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
205 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
206 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
207 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
208 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
209 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
210 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
211 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
212 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
213 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
214 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
215 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
216 ; GFX802-SDAG-NEXT: s_endpgm
218 ; GFX1010-SDAG-LABEL: test_writelane_sreg_f64:
219 ; GFX1010-SDAG: ; %bb.0:
220 ; GFX1010-SDAG-NEXT: s_clause 0x1
221 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
222 ; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
223 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
224 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
225 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
226 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
227 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
228 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
229 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6
230 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6
231 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
232 ; GFX1010-SDAG-NEXT: s_endpgm
234 ; GFX1100-SDAG-LABEL: test_writelane_sreg_f64:
235 ; GFX1100-SDAG: ; %bb.0:
236 ; GFX1100-SDAG-NEXT: s_clause 0x1
237 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
238 ; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10
239 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
240 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
241 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
242 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
243 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
244 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
245 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6
246 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6
247 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
248 ; GFX1100-SDAG-NEXT: s_endpgm
250 ; GFX802-GISEL-LABEL: test_writelane_sreg_f64:
251 ; GFX802-GISEL: ; %bb.0:
252 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
253 ; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
254 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
255 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
256 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
257 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
258 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
259 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
260 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
261 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
262 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
263 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
264 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
265 ; GFX802-GISEL-NEXT: s_endpgm
267 ; GFX1010-GISEL-LABEL: test_writelane_sreg_f64:
268 ; GFX1010-GISEL: ; %bb.0:
269 ; GFX1010-GISEL-NEXT: s_clause 0x1
270 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
271 ; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
272 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
273 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
274 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
275 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
276 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
277 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
278 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6
279 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6
280 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
281 ; GFX1010-GISEL-NEXT: s_endpgm
283 ; GFX1100-GISEL-LABEL: test_writelane_sreg_f64:
284 ; GFX1100-GISEL: ; %bb.0:
285 ; GFX1100-GISEL-NEXT: s_clause 0x1
286 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
287 ; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10
288 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
289 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
290 ; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
291 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
292 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
293 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
294 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6
295 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6
296 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
297 ; GFX1100-GISEL-NEXT: s_endpgm
298 %oldval = load double, ptr addrspace(1) %out
299 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
300 store double %writelane, ptr addrspace(1) %out, align 4
304 define amdgpu_kernel void @test_writelane_imm_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
305 ; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i32:
306 ; GFX802-SDAG: ; %bb.0:
307 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
308 ; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
309 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
310 ; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
311 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
312 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
313 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
314 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
315 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, 32, s2
316 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
317 ; GFX802-SDAG-NEXT: s_endpgm
319 ; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i32:
320 ; GFX1010-SDAG: ; %bb.0:
321 ; GFX1010-SDAG-NEXT: s_clause 0x1
322 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
323 ; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
324 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
325 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
326 ; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
327 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
328 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
329 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s2
330 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
331 ; GFX1010-SDAG-NEXT: s_endpgm
333 ; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i32:
334 ; GFX1100-SDAG: ; %bb.0:
335 ; GFX1100-SDAG-NEXT: s_clause 0x1
336 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
337 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
338 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
339 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
340 ; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
341 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
342 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
343 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s2
344 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
345 ; GFX1100-SDAG-NEXT: s_endpgm
347 ; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i32:
348 ; GFX802-GISEL: ; %bb.0:
349 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
350 ; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
351 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
352 ; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
353 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
354 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
355 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
356 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
357 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, 32, s2
358 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
359 ; GFX802-GISEL-NEXT: s_endpgm
361 ; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i32:
362 ; GFX1010-GISEL: ; %bb.0:
363 ; GFX1010-GISEL-NEXT: s_clause 0x1
364 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
365 ; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
366 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
367 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
368 ; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
369 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
370 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
371 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s2
372 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
373 ; GFX1010-GISEL-NEXT: s_endpgm
375 ; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i32:
376 ; GFX1100-GISEL: ; %bb.0:
377 ; GFX1100-GISEL-NEXT: s_clause 0x1
378 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
379 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
380 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
381 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
382 ; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
383 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
384 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
385 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s2
386 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
387 ; GFX1100-GISEL-NEXT: s_endpgm
388 %oldval = load i32, ptr addrspace(1) %out
389 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 32, i32 %src1, i32 %oldval)
390 store i32 %writelane, ptr addrspace(1) %out, align 4
394 define amdgpu_kernel void @test_writelane_imm_sreg_i64(ptr addrspace(1) %out, i32 %src1) #1 {
395 ; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i64:
396 ; GFX802-SDAG: ; %bb.0:
397 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
398 ; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
399 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
400 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
401 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
402 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
403 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
405 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
406 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s4
407 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, 32, s4
408 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
409 ; GFX802-SDAG-NEXT: s_endpgm
411 ; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i64:
412 ; GFX1010-SDAG: ; %bb.0:
413 ; GFX1010-SDAG-NEXT: s_clause 0x1
414 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
415 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
416 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
417 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
418 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
419 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
420 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
421 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
422 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s4
423 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s4
424 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
425 ; GFX1010-SDAG-NEXT: s_endpgm
427 ; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i64:
428 ; GFX1100-SDAG: ; %bb.0:
429 ; GFX1100-SDAG-NEXT: s_clause 0x1
430 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
431 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8
432 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
433 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
434 ; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
435 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
436 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
437 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
438 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s4
439 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s4
440 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
441 ; GFX1100-SDAG-NEXT: s_endpgm
443 ; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i64:
444 ; GFX802-GISEL: ; %bb.0:
445 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
446 ; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
447 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
448 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
449 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
450 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
451 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
452 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
453 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
454 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, 32, s4
455 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s4
456 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
457 ; GFX802-GISEL-NEXT: s_endpgm
459 ; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i64:
460 ; GFX1010-GISEL: ; %bb.0:
461 ; GFX1010-GISEL-NEXT: s_clause 0x1
462 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
463 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
464 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
465 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
466 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
467 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
468 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2
469 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3
470 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s4
471 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s4
472 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
473 ; GFX1010-GISEL-NEXT: s_endpgm
475 ; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i64:
476 ; GFX1100-GISEL: ; %bb.0:
477 ; GFX1100-GISEL-NEXT: s_clause 0x1
478 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
479 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8
480 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
481 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
482 ; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
483 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
484 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2
485 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3
486 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s4
487 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s4
488 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
489 ; GFX1100-GISEL-NEXT: s_endpgm
490 %oldval = load i64, ptr addrspace(1) %out
491 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 32, i32 %src1, i64 %oldval)
492 store i64 %writelane, ptr addrspace(1) %out, align 4
496 define amdgpu_kernel void @test_writelane_imm_sreg_f64(ptr addrspace(1) %out, i32 %src1) #1 {
497 ; GFX802-SDAG-LABEL: test_writelane_imm_sreg_f64:
498 ; GFX802-SDAG: ; %bb.0:
499 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
500 ; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
501 ; GFX802-SDAG-NEXT: s_mov_b32 s5, 0x40400000
502 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
503 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
504 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
505 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
506 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
507 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
508 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
509 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
510 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s5, m0
511 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s4
512 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
513 ; GFX802-SDAG-NEXT: s_endpgm
515 ; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_f64:
516 ; GFX1010-SDAG: ; %bb.0:
517 ; GFX1010-SDAG-NEXT: s_clause 0x1
518 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
519 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
520 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
521 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
522 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
523 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
524 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
525 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
526 ; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40400000
527 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s4
528 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s4
529 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
530 ; GFX1010-SDAG-NEXT: s_endpgm
532 ; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_f64:
533 ; GFX1100-SDAG: ; %bb.0:
534 ; GFX1100-SDAG-NEXT: s_clause 0x1
535 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
536 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8
537 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
538 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
539 ; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
540 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
541 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
542 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
543 ; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40400000
544 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
545 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s4
546 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s4
547 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
548 ; GFX1100-SDAG-NEXT: s_endpgm
550 ; GFX802-GISEL-LABEL: test_writelane_imm_sreg_f64:
551 ; GFX802-GISEL: ; %bb.0:
552 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
553 ; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
554 ; GFX802-GISEL-NEXT: s_mov_b32 s5, 0x40400000
555 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
556 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
557 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
558 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
559 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
560 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
561 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
562 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
563 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, 0, s4
564 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
565 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
566 ; GFX802-GISEL-NEXT: s_endpgm
568 ; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_f64:
569 ; GFX1010-GISEL: ; %bb.0:
570 ; GFX1010-GISEL-NEXT: s_clause 0x1
571 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
572 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
573 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
574 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
575 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
576 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
577 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2
578 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3
579 ; GFX1010-GISEL-NEXT: s_mov_b32 s2, 0x40400000
580 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 0, s4
581 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s2, s4
582 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
583 ; GFX1010-GISEL-NEXT: s_endpgm
585 ; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_f64:
586 ; GFX1100-GISEL: ; %bb.0:
587 ; GFX1100-GISEL-NEXT: s_clause 0x1
588 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
589 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8
590 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
591 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
592 ; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
593 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
594 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2
595 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3
596 ; GFX1100-GISEL-NEXT: s_mov_b32 s2, 0x40400000
597 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 0, s4
598 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s2, s4
599 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
600 ; GFX1100-GISEL-NEXT: s_endpgm
601 %oldval = load double, ptr addrspace(1) %out
602 %writelane = call double @llvm.amdgcn.writelane.f64(double 32.0, i32 %src1, double %oldval)
603 store double %writelane, ptr addrspace(1) %out, align 4
607 define amdgpu_kernel void @test_writelane_vreg_lane_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
608 ; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i32:
609 ; GFX802-SDAG: ; %bb.0:
610 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
611 ; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
612 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
613 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
614 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
615 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
616 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 4, v0
617 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
618 ; GFX802-SDAG-NEXT: flat_load_dword v0, v[0:1]
619 ; GFX802-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0
620 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
621 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s2
622 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
623 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v0
624 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
625 ; GFX802-SDAG-NEXT: s_nop 2
626 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, 12, s2
627 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
628 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
629 ; GFX802-SDAG-NEXT: s_endpgm
631 ; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i32:
632 ; GFX1010-SDAG: ; %bb.0:
633 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
634 ; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
635 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
636 ; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:4
637 ; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
638 ; GFX1010-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0
639 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
640 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s2
641 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
642 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s2, v0
643 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0
644 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 12, s2
645 ; GFX1010-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
646 ; GFX1010-SDAG-NEXT: s_endpgm
648 ; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i32:
649 ; GFX1100-SDAG: ; %bb.0:
650 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
651 ; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
652 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
653 ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
654 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
655 ; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:4
656 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x0
657 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
658 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s2
659 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
660 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v0
661 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
662 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
663 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 12, s2
664 ; GFX1100-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
665 ; GFX1100-SDAG-NEXT: s_endpgm
667 ; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i32:
668 ; GFX802-GISEL: ; %bb.0:
669 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
670 ; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
671 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
672 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
673 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
674 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
675 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
676 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0
677 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
678 ; GFX802-GISEL-NEXT: flat_load_dword v0, v[0:1]
679 ; GFX802-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
680 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
681 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
682 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
683 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
684 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
685 ; GFX802-GISEL-NEXT: s_nop 2
686 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, 12, s2
687 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
688 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
689 ; GFX802-GISEL-NEXT: s_endpgm
691 ; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i32:
692 ; GFX1010-GISEL: ; %bb.0:
693 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
694 ; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
695 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
696 ; GFX1010-GISEL-NEXT: global_load_dword v0, v0, s[2:3] offset:4
697 ; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
698 ; GFX1010-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
699 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
700 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
701 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
702 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
703 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
704 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2
705 ; GFX1010-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
706 ; GFX1010-GISEL-NEXT: s_endpgm
708 ; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i32:
709 ; GFX1100-GISEL: ; %bb.0:
710 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
711 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
712 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
713 ; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
714 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
715 ; GFX1100-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] offset:4
716 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x0
717 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
718 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
719 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
720 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
721 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
722 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
723 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2
724 ; GFX1100-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
725 ; GFX1100-GISEL-NEXT: s_endpgm
726 %tid = call i32 @llvm.amdgcn.workitem.id.x()
727 %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid
728 %args = load <2 x i32>, ptr addrspace(1) %gep.in
729 %oldval = load i32, ptr addrspace(1) %out
730 %lane = extractelement <2 x i32> %args, i32 1
731 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 12, i32 %lane, i32 %oldval)
732 store i32 %writelane, ptr addrspace(1) %out, align 4
736 define amdgpu_kernel void @test_writelane_vreg_lane_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
737 ; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i64:
738 ; GFX802-SDAG: ; %bb.0:
739 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
740 ; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
741 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
742 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
743 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
744 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
745 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0
746 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
747 ; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1]
748 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
749 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
750 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
751 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
752 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
753 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2
754 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
755 ; GFX802-SDAG-NEXT: s_nop 2
756 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s2
757 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, 12, s2
758 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
759 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
760 ; GFX802-SDAG-NEXT: s_endpgm
762 ; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i64:
763 ; GFX1010-SDAG: ; %bb.0:
764 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
765 ; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
766 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
767 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
768 ; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8
769 ; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
770 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
771 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
772 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
773 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
774 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0
775 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
776 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s3
777 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 12, s3
778 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
779 ; GFX1010-SDAG-NEXT: s_endpgm
781 ; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i64:
782 ; GFX1100-SDAG: ; %bb.0:
783 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
784 ; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
785 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
786 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
787 ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
788 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
789 ; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8
790 ; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
791 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
792 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
793 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
794 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0
795 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
796 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
797 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s3
798 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 12, s3
799 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
800 ; GFX1100-SDAG-NEXT: s_endpgm
802 ; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i64:
803 ; GFX802-GISEL: ; %bb.0:
804 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
805 ; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0
806 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
807 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
808 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
809 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
810 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
811 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0
812 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
813 ; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
814 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
815 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1
816 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0
817 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
818 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2
819 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
820 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
821 ; GFX802-GISEL-NEXT: s_nop 3
822 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, 12, s2
823 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, 0, s2
824 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
825 ; GFX802-GISEL-NEXT: s_endpgm
827 ; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i64:
828 ; GFX1010-GISEL: ; %bb.0:
829 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
830 ; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
831 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
832 ; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
833 ; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
834 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
835 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
836 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
837 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3
838 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
839 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
840 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2
841 ; GFX1010-GISEL-NEXT: v_writelane_b32 v2, 0, s2
842 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
843 ; GFX1010-GISEL-NEXT: s_endpgm
845 ; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i64:
846 ; GFX1100-GISEL: ; %bb.0:
847 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
848 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
849 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
850 ; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
851 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
852 ; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8
853 ; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
854 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
855 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
856 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3
857 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
858 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
859 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
860 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2
861 ; GFX1100-GISEL-NEXT: v_writelane_b32 v2, 0, s2
862 ; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
863 ; GFX1100-GISEL-NEXT: s_endpgm
864 %tid = call i32 @llvm.amdgcn.workitem.id.x()
865 %gep.in = getelementptr <2 x i64>, ptr addrspace(1) %in, i32 %tid
866 %args = load <2 x i64>, ptr addrspace(1) %gep.in
867 %oldval = load i64, ptr addrspace(1) %out
868 %lane = extractelement <2 x i64> %args, i32 1
869 %lane32 = trunc i64 %lane to i32
870 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 12, i32 %lane32, i64 %oldval)
871 store i64 %writelane, ptr addrspace(1) %out, align 4
875 define amdgpu_kernel void @test_writelane_vreg_lane_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
876 ; GFX802-SDAG-LABEL: test_writelane_vreg_lane_f64:
877 ; GFX802-SDAG: ; %bb.0:
878 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
879 ; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
880 ; GFX802-SDAG-NEXT: s_mov_b32 s4, 0x40280000
881 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
882 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
883 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
884 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
885 ; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0
886 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
887 ; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1]
888 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
889 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
890 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
891 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
892 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
893 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v2
894 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2
895 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
896 ; GFX802-SDAG-NEXT: s_nop 1
897 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s4, m0
898 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
899 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s2
900 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
901 ; GFX802-SDAG-NEXT: s_endpgm
903 ; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_f64:
904 ; GFX1010-SDAG: ; %bb.0:
905 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
906 ; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
907 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
908 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
909 ; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8
910 ; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
911 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
912 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
913 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
914 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
915 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0
916 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
917 ; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40280000
918 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s3
919 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s3
920 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
921 ; GFX1010-SDAG-NEXT: s_endpgm
923 ; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_f64:
924 ; GFX1100-SDAG: ; %bb.0:
925 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
926 ; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
927 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
928 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
929 ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
930 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
931 ; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8
932 ; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
933 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
934 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
935 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
936 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0
937 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
938 ; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40280000
939 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
940 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s3
941 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s3
942 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
943 ; GFX1100-SDAG-NEXT: s_endpgm
945 ; GFX802-GISEL-LABEL: test_writelane_vreg_lane_f64:
946 ; GFX802-GISEL: ; %bb.0:
947 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
948 ; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0
949 ; GFX802-GISEL-NEXT: s_mov_b32 s4, 0x40280000
950 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
951 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
952 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
953 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
954 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
955 ; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0
956 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
957 ; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
958 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
959 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1
960 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0
961 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
962 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2
963 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
964 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
965 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s2
966 ; GFX802-GISEL-NEXT: s_nop 2
967 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s2
968 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0
969 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
970 ; GFX802-GISEL-NEXT: s_endpgm
972 ; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_f64:
973 ; GFX1010-GISEL: ; %bb.0:
974 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
975 ; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
976 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
977 ; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
978 ; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
979 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
980 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
981 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
982 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3
983 ; GFX1010-GISEL-NEXT: s_mov_b32 s3, 0x40280000
984 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
985 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
986 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s2
987 ; GFX1010-GISEL-NEXT: v_writelane_b32 v2, s3, s2
988 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
989 ; GFX1010-GISEL-NEXT: s_endpgm
991 ; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_f64:
992 ; GFX1100-GISEL: ; %bb.0:
993 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
994 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
995 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
996 ; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
997 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
998 ; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8
999 ; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
1000 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1001 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
1002 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3
1003 ; GFX1100-GISEL-NEXT: s_mov_b32 s3, 0x40280000
1004 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
1005 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
1006 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1007 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s2
1008 ; GFX1100-GISEL-NEXT: v_writelane_b32 v2, s3, s2
1009 ; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
1010 ; GFX1100-GISEL-NEXT: s_endpgm
1011 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1012 %gep.in = getelementptr <2 x double>, ptr addrspace(1) %in, i32 %tid
1013 %args = load <2 x double>, ptr addrspace(1) %gep.in
1014 %oldval = load double, ptr addrspace(1) %out
1015 %lane = extractelement <2 x double> %args, i32 1
1016 %lane_cast = bitcast double %lane to i64
1017 %lane32 = trunc i64 %lane_cast to i32
1018 %writelane = call double @llvm.amdgcn.writelane.f64(double 12.0, i32 %lane32, double %oldval)
1019 store double %writelane, ptr addrspace(1) %out, align 4
1023 define amdgpu_kernel void @test_writelane_m0_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
1024 ; GFX802-SDAG-LABEL: test_writelane_m0_sreg_i32:
1025 ; GFX802-SDAG: ; %bb.0:
1026 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1027 ; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
1028 ; GFX802-SDAG-NEXT: ;;#ASMSTART
1029 ; GFX802-SDAG-NEXT: s_mov_b32 m0, -1
1030 ; GFX802-SDAG-NEXT: ;;#ASMEND
1031 ; GFX802-SDAG-NEXT: s_mov_b32 s4, m0
1032 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1033 ; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1034 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s2
1035 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1036 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1037 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1038 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
1039 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
1040 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1041 ; GFX802-SDAG-NEXT: s_endpgm
1043 ; GFX1010-SDAG-LABEL: test_writelane_m0_sreg_i32:
1044 ; GFX1010-SDAG: ; %bb.0:
1045 ; GFX1010-SDAG-NEXT: s_clause 0x1
1046 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1047 ; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
1048 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1049 ; GFX1010-SDAG-NEXT: ;;#ASMSTART
1050 ; GFX1010-SDAG-NEXT: s_mov_b32 m0, -1
1051 ; GFX1010-SDAG-NEXT: ;;#ASMEND
1052 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1053 ; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1054 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1055 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
1056 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, m0, s2
1057 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1058 ; GFX1010-SDAG-NEXT: s_endpgm
1060 ; GFX1100-SDAG-LABEL: test_writelane_m0_sreg_i32:
1061 ; GFX1100-SDAG: ; %bb.0:
1062 ; GFX1100-SDAG-NEXT: s_clause 0x1
1063 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1064 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
1065 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1066 ; GFX1100-SDAG-NEXT: ;;#ASMSTART
1067 ; GFX1100-SDAG-NEXT: s_mov_b32 m0, -1
1068 ; GFX1100-SDAG-NEXT: ;;#ASMEND
1069 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1070 ; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
1071 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1072 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
1073 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, m0, s2
1074 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
1075 ; GFX1100-SDAG-NEXT: s_endpgm
1077 ; GFX802-GISEL-LABEL: test_writelane_m0_sreg_i32:
1078 ; GFX802-GISEL: ; %bb.0:
1079 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1080 ; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
1081 ; GFX802-GISEL-NEXT: ;;#ASMSTART
1082 ; GFX802-GISEL-NEXT: s_mov_b32 m0, -1
1083 ; GFX802-GISEL-NEXT: ;;#ASMEND
1084 ; GFX802-GISEL-NEXT: s_mov_b32 s4, m0
1085 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1086 ; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1087 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s2
1088 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1089 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1090 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1091 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
1092 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0
1093 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1094 ; GFX802-GISEL-NEXT: s_endpgm
1096 ; GFX1010-GISEL-LABEL: test_writelane_m0_sreg_i32:
1097 ; GFX1010-GISEL: ; %bb.0:
1098 ; GFX1010-GISEL-NEXT: s_clause 0x1
1099 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1100 ; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
1101 ; GFX1010-GISEL-NEXT: ;;#ASMSTART
1102 ; GFX1010-GISEL-NEXT: s_mov_b32 m0, -1
1103 ; GFX1010-GISEL-NEXT: ;;#ASMEND
1104 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1105 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1106 ; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1107 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1108 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
1109 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, m0, s2
1110 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1111 ; GFX1010-GISEL-NEXT: s_endpgm
1113 ; GFX1100-GISEL-LABEL: test_writelane_m0_sreg_i32:
1114 ; GFX1100-GISEL: ; %bb.0:
1115 ; GFX1100-GISEL-NEXT: s_clause 0x1
1116 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1117 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
1118 ; GFX1100-GISEL-NEXT: ;;#ASMSTART
1119 ; GFX1100-GISEL-NEXT: s_mov_b32 m0, -1
1120 ; GFX1100-GISEL-NEXT: ;;#ASMEND
1121 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1122 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1123 ; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
1124 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1125 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
1126 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, m0, s2
1127 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
1128 ; GFX1100-GISEL-NEXT: s_endpgm
1129 %oldval = load i32, ptr addrspace(1) %out
1130 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
1131 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %m0, i32 %src1, i32 %oldval)
1132 store i32 %writelane, ptr addrspace(1) %out, align 4
1136 define amdgpu_kernel void @test_writelane_imm_i32(ptr addrspace(1) %out, i32 %src0) #1 {
1137 ; GFX802-SDAG-LABEL: test_writelane_imm_i32:
1138 ; GFX802-SDAG: ; %bb.0:
1139 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1140 ; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
1141 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1142 ; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1143 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1144 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1145 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1146 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
1147 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, 32
1148 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1149 ; GFX802-SDAG-NEXT: s_endpgm
1151 ; GFX1010-SDAG-LABEL: test_writelane_imm_i32:
1152 ; GFX1010-SDAG: ; %bb.0:
1153 ; GFX1010-SDAG-NEXT: s_clause 0x1
1154 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1155 ; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
1156 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1157 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1158 ; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1159 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1160 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
1161 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1162 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1163 ; GFX1010-SDAG-NEXT: s_endpgm
1165 ; GFX1100-SDAG-LABEL: test_writelane_imm_i32:
1166 ; GFX1100-SDAG: ; %bb.0:
1167 ; GFX1100-SDAG-NEXT: s_clause 0x1
1168 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1169 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
1170 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1171 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1172 ; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
1173 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1174 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
1175 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1176 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
1177 ; GFX1100-SDAG-NEXT: s_endpgm
1179 ; GFX802-GISEL-LABEL: test_writelane_imm_i32:
1180 ; GFX802-GISEL: ; %bb.0:
1181 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1182 ; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
1183 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1184 ; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1185 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1186 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1187 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1188 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
1189 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, 32
1190 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1191 ; GFX802-GISEL-NEXT: s_endpgm
1193 ; GFX1010-GISEL-LABEL: test_writelane_imm_i32:
1194 ; GFX1010-GISEL: ; %bb.0:
1195 ; GFX1010-GISEL-NEXT: s_clause 0x1
1196 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1197 ; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
1198 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1199 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1200 ; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1201 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1202 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
1203 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1204 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1205 ; GFX1010-GISEL-NEXT: s_endpgm
1207 ; GFX1100-GISEL-LABEL: test_writelane_imm_i32:
1208 ; GFX1100-GISEL: ; %bb.0:
1209 ; GFX1100-GISEL-NEXT: s_clause 0x1
1210 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1211 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
1212 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1213 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1214 ; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
1215 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1216 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
1217 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1218 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
1219 ; GFX1100-GISEL-NEXT: s_endpgm
1220 %oldval = load i32, ptr addrspace(1) %out
1221 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 32, i32 %oldval) #0
1222 store i32 %writelane, ptr addrspace(1) %out, align 4
1226 define amdgpu_kernel void @test_writelane_imm_i64(ptr addrspace(1) %out, i64 %src0) #1 {
1227 ; GFX802-SDAG-LABEL: test_writelane_imm_i64:
1228 ; GFX802-SDAG: ; %bb.0:
1229 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1230 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1231 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1232 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1233 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1234 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1235 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
1236 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
1237 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1238 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1239 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1240 ; GFX802-SDAG-NEXT: s_endpgm
1242 ; GFX1010-SDAG-LABEL: test_writelane_imm_i64:
1243 ; GFX1010-SDAG: ; %bb.0:
1244 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1245 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1246 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1247 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1248 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1249 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
1250 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
1251 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1252 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1253 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1254 ; GFX1010-SDAG-NEXT: s_endpgm
1256 ; GFX1100-SDAG-LABEL: test_writelane_imm_i64:
1257 ; GFX1100-SDAG: ; %bb.0:
1258 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1259 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1260 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1261 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1262 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1263 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
1264 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
1265 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1266 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1267 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1268 ; GFX1100-SDAG-NEXT: s_endpgm
1270 ; GFX802-GISEL-LABEL: test_writelane_imm_i64:
1271 ; GFX802-GISEL: ; %bb.0:
1272 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1273 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1274 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1275 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1276 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1277 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1278 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
1279 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
1280 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1281 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1282 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1283 ; GFX802-GISEL-NEXT: s_endpgm
1285 ; GFX1010-GISEL-LABEL: test_writelane_imm_i64:
1286 ; GFX1010-GISEL: ; %bb.0:
1287 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1288 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1289 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1290 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1291 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1292 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
1293 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
1294 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1295 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1296 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1297 ; GFX1010-GISEL-NEXT: s_endpgm
1299 ; GFX1100-GISEL-LABEL: test_writelane_imm_i64:
1300 ; GFX1100-GISEL: ; %bb.0:
1301 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1302 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1303 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1304 ; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1305 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1306 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
1307 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
1308 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1309 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1310 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1311 ; GFX1100-GISEL-NEXT: s_endpgm
1312 %oldval = load i64, ptr addrspace(1) %out
1313 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 32, i64 %oldval) #0
1314 store i64 %writelane, ptr addrspace(1) %out, align 4
1318 define amdgpu_kernel void @test_writelane_imm_f64(ptr addrspace(1) %out, double %src0) #1 {
1319 ; GFX802-SDAG-LABEL: test_writelane_imm_f64:
1320 ; GFX802-SDAG: ; %bb.0:
1321 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1322 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1323 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1324 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1325 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1326 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1327 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
1328 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
1329 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1330 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1331 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1332 ; GFX802-SDAG-NEXT: s_endpgm
1334 ; GFX1010-SDAG-LABEL: test_writelane_imm_f64:
1335 ; GFX1010-SDAG: ; %bb.0:
1336 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1337 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1338 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1339 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1340 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1341 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
1342 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
1343 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1344 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1345 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1346 ; GFX1010-SDAG-NEXT: s_endpgm
1348 ; GFX1100-SDAG-LABEL: test_writelane_imm_f64:
1349 ; GFX1100-SDAG: ; %bb.0:
1350 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1351 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1352 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1353 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1354 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1355 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
1356 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
1357 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1358 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1359 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1360 ; GFX1100-SDAG-NEXT: s_endpgm
1362 ; GFX802-GISEL-LABEL: test_writelane_imm_f64:
1363 ; GFX802-GISEL: ; %bb.0:
1364 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1365 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1366 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1367 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1368 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1369 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1370 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
1371 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
1372 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1373 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1374 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1375 ; GFX802-GISEL-NEXT: s_endpgm
1377 ; GFX1010-GISEL-LABEL: test_writelane_imm_f64:
1378 ; GFX1010-GISEL: ; %bb.0:
1379 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1380 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1381 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1382 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1383 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1384 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
1385 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
1386 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1387 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1388 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1389 ; GFX1010-GISEL-NEXT: s_endpgm
1391 ; GFX1100-GISEL-LABEL: test_writelane_imm_f64:
1392 ; GFX1100-GISEL: ; %bb.0:
1393 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1394 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1395 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1396 ; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1397 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1398 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
1399 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
1400 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1401 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1402 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1403 ; GFX1100-GISEL-NEXT: s_endpgm
1404 %oldval = load double, ptr addrspace(1) %out
1405 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 32, double %oldval) #0
1406 store double %writelane, ptr addrspace(1) %out, align 4
1410 define amdgpu_kernel void @test_writelane_sreg_oldval_i32(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
1411 ; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1412 ; GFX802-SDAG: ; %bb.0:
1413 ; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0
1414 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
1415 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1416 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s4
1417 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
1418 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1419 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
1420 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1421 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1422 ; GFX802-SDAG-NEXT: s_endpgm
1424 ; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1425 ; GFX1010-SDAG: ; %bb.0:
1426 ; GFX1010-SDAG-NEXT: s_clause 0x1
1427 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0
1428 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
1429 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1430 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1431 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
1432 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1433 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1434 ; GFX1010-SDAG-NEXT: s_endpgm
1436 ; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1437 ; GFX1100-SDAG: ; %bb.0:
1438 ; GFX1100-SDAG-NEXT: s_clause 0x1
1439 ; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x0
1440 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
1441 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1442 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1443 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s6
1444 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1445 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
1446 ; GFX1100-SDAG-NEXT: s_endpgm
1448 ; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1449 ; GFX802-GISEL: ; %bb.0:
1450 ; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0
1451 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
1452 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1453 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s4
1454 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
1455 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1456 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
1457 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1458 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1459 ; GFX802-GISEL-NEXT: s_endpgm
1461 ; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1462 ; GFX1010-GISEL: ; %bb.0:
1463 ; GFX1010-GISEL-NEXT: s_clause 0x1
1464 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0
1465 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
1466 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1467 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1468 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
1469 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1470 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1471 ; GFX1010-GISEL-NEXT: s_endpgm
1473 ; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1474 ; GFX1100-GISEL: ; %bb.0:
1475 ; GFX1100-GISEL-NEXT: s_clause 0x1
1476 ; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x0
1477 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
1478 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1479 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1480 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s6
1481 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1482 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
1483 ; GFX1100-GISEL-NEXT: s_endpgm
1484 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
1485 store i32 %writelane, ptr addrspace(1) %out, align 4
1489 define amdgpu_kernel void @test_writelane_sreg_oldval_i64(i64 inreg %oldval, ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
1490 ; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1491 ; GFX802-SDAG: ; %bb.0:
1492 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1493 ; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1494 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1495 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1496 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1497 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
1498 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1499 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
1500 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
1501 ; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0
1502 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
1503 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1504 ; GFX802-SDAG-NEXT: s_endpgm
1506 ; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1507 ; GFX1010-SDAG: ; %bb.0:
1508 ; GFX1010-SDAG-NEXT: s_clause 0x2
1509 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1510 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1511 ; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1512 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1513 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1514 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1
1515 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0
1516 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6
1517 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6
1518 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1519 ; GFX1010-SDAG-NEXT: s_endpgm
1521 ; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1522 ; GFX1100-SDAG: ; %bb.0:
1523 ; GFX1100-SDAG-NEXT: s_clause 0x2
1524 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1525 ; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1526 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18
1527 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1528 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1529 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1
1530 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0
1531 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4
1532 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4
1533 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3]
1534 ; GFX1100-SDAG-NEXT: s_endpgm
1536 ; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1537 ; GFX802-GISEL: ; %bb.0:
1538 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1539 ; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1540 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1541 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1542 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1543 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1544 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
1545 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
1546 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0
1547 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
1548 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3
1549 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1550 ; GFX802-GISEL-NEXT: s_endpgm
1552 ; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1553 ; GFX1010-GISEL: ; %bb.0:
1554 ; GFX1010-GISEL-NEXT: s_clause 0x2
1555 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1556 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1557 ; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1558 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1559 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1560 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0
1561 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1
1562 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6
1563 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6
1564 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1565 ; GFX1010-GISEL-NEXT: s_endpgm
1567 ; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1568 ; GFX1100-GISEL: ; %bb.0:
1569 ; GFX1100-GISEL-NEXT: s_clause 0x2
1570 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1571 ; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1572 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18
1573 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1574 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1575 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0
1576 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1
1577 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4
1578 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4
1579 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3]
1580 ; GFX1100-GISEL-NEXT: s_endpgm
1581 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
1582 store i64 %writelane, ptr addrspace(1) %out, align 4
1586 define amdgpu_kernel void @test_writelane_sreg_oldval_f64(double inreg %oldval, ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
1587 ; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1588 ; GFX802-SDAG: ; %bb.0:
1589 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1590 ; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1591 ; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1592 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1593 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1594 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
1595 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1596 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
1597 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
1598 ; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0
1599 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
1600 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1601 ; GFX802-SDAG-NEXT: s_endpgm
1603 ; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1604 ; GFX1010-SDAG: ; %bb.0:
1605 ; GFX1010-SDAG-NEXT: s_clause 0x2
1606 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1607 ; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1608 ; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1609 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1610 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1611 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1
1612 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0
1613 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6
1614 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6
1615 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1616 ; GFX1010-SDAG-NEXT: s_endpgm
1618 ; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1619 ; GFX1100-SDAG: ; %bb.0:
1620 ; GFX1100-SDAG-NEXT: s_clause 0x2
1621 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1622 ; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1623 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18
1624 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1625 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1626 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1
1627 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0
1628 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4
1629 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4
1630 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3]
1631 ; GFX1100-SDAG-NEXT: s_endpgm
1633 ; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1634 ; GFX802-GISEL: ; %bb.0:
1635 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1636 ; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1637 ; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1638 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1639 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1640 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1641 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
1642 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
1643 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0
1644 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
1645 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3
1646 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1647 ; GFX802-GISEL-NEXT: s_endpgm
1649 ; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1650 ; GFX1010-GISEL: ; %bb.0:
1651 ; GFX1010-GISEL-NEXT: s_clause 0x2
1652 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1653 ; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1654 ; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1655 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1656 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1657 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0
1658 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1
1659 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6
1660 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6
1661 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1662 ; GFX1010-GISEL-NEXT: s_endpgm
1664 ; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1665 ; GFX1100-GISEL: ; %bb.0:
1666 ; GFX1100-GISEL-NEXT: s_clause 0x2
1667 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1668 ; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1669 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18
1670 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1671 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1672 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0
1673 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1
1674 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4
1675 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4
1676 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3]
1677 ; GFX1100-GISEL-NEXT: s_endpgm
1678 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
1679 store double %writelane, ptr addrspace(1) %out, align 4
1683 define amdgpu_kernel void @test_writelane_imm_oldval_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
1684 ; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i32:
1685 ; GFX802-SDAG: ; %bb.0:
1686 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1687 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, 42
1688 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1689 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
1690 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1691 ; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
1692 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1693 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1694 ; GFX802-SDAG-NEXT: s_endpgm
1696 ; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i32:
1697 ; GFX1010-SDAG: ; %bb.0:
1698 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1699 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42
1700 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1701 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1702 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1703 ; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1704 ; GFX1010-SDAG-NEXT: s_endpgm
1706 ; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i32:
1707 ; GFX1100-SDAG: ; %bb.0:
1708 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1709 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42
1710 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1711 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1712 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1713 ; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
1714 ; GFX1100-SDAG-NEXT: s_endpgm
1716 ; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i32:
1717 ; GFX802-GISEL: ; %bb.0:
1718 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1719 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, 42
1720 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1721 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
1722 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1723 ; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
1724 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1725 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1726 ; GFX802-GISEL-NEXT: s_endpgm
1728 ; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i32:
1729 ; GFX1010-GISEL: ; %bb.0:
1730 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1731 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42
1732 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1733 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1734 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1735 ; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1736 ; GFX1010-GISEL-NEXT: s_endpgm
1738 ; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i32:
1739 ; GFX1100-GISEL: ; %bb.0:
1740 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1741 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42
1742 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1743 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1744 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1745 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
1746 ; GFX1100-GISEL-NEXT: s_endpgm
1747 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 42)
1748 store i32 %writelane, ptr addrspace(1) %out, align 4
1752 define amdgpu_kernel void @test_writelane_imm_oldval_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
1753 ; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i64:
1754 ; GFX802-SDAG: ; %bb.0:
1755 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1756 ; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
1757 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0
1758 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 42
1759 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1760 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1761 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
1762 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1763 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
1764 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
1765 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1766 ; GFX802-SDAG-NEXT: s_endpgm
1768 ; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i64:
1769 ; GFX1010-SDAG: ; %bb.0:
1770 ; GFX1010-SDAG-NEXT: s_clause 0x1
1771 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1772 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
1773 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1774 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42
1775 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1776 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1777 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1778 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1779 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1780 ; GFX1010-SDAG-NEXT: s_endpgm
1782 ; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i64:
1783 ; GFX1100-SDAG: ; %bb.0:
1784 ; GFX1100-SDAG-NEXT: s_clause 0x1
1785 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1786 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10
1787 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1788 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42
1789 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1790 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1791 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1792 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1793 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1794 ; GFX1100-SDAG-NEXT: s_endpgm
1796 ; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i64:
1797 ; GFX802-GISEL: ; %bb.0:
1798 ; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
1799 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1800 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 42
1801 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0
1802 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1803 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
1804 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1805 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
1806 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
1807 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1808 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1809 ; GFX802-GISEL-NEXT: s_endpgm
1811 ; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i64:
1812 ; GFX1010-GISEL: ; %bb.0:
1813 ; GFX1010-GISEL-NEXT: s_clause 0x1
1814 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1815 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
1816 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42
1817 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1818 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1819 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1820 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1821 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4
1822 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1823 ; GFX1010-GISEL-NEXT: s_endpgm
1825 ; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i64:
1826 ; GFX1100-GISEL: ; %bb.0:
1827 ; GFX1100-GISEL-NEXT: s_clause 0x1
1828 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1829 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10
1830 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42
1831 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1832 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1833 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1834 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1835 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4
1836 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1837 ; GFX1100-GISEL-NEXT: s_endpgm
1838 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 42)
1839 store i64 %writelane, ptr addrspace(1) %out, align 4
1843 define amdgpu_kernel void @test_writelane_imm_oldval_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
1844 ; GFX802-SDAG-LABEL: test_writelane_imm_oldval_f64:
1845 ; GFX802-SDAG: ; %bb.0:
1846 ; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1847 ; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
1848 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1849 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 0
1850 ; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1851 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1852 ; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
1853 ; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1854 ; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
1855 ; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
1856 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1857 ; GFX802-SDAG-NEXT: s_endpgm
1859 ; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_f64:
1860 ; GFX1010-SDAG: ; %bb.0:
1861 ; GFX1010-SDAG-NEXT: s_clause 0x1
1862 ; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1863 ; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
1864 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1865 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0
1866 ; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1867 ; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1868 ; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1869 ; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1870 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1871 ; GFX1010-SDAG-NEXT: s_endpgm
1873 ; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_f64:
1874 ; GFX1100-SDAG: ; %bb.0:
1875 ; GFX1100-SDAG-NEXT: s_clause 0x1
1876 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1877 ; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10
1878 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1879 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
1880 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1881 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1882 ; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1883 ; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1884 ; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1885 ; GFX1100-SDAG-NEXT: s_endpgm
1887 ; GFX802-GISEL-LABEL: test_writelane_imm_oldval_f64:
1888 ; GFX802-GISEL: ; %bb.0:
1889 ; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
1890 ; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1891 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 0
1892 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
1893 ; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1894 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
1895 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1896 ; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
1897 ; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
1898 ; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1899 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1900 ; GFX802-GISEL-NEXT: s_endpgm
1902 ; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_f64:
1903 ; GFX1010-GISEL: ; %bb.0:
1904 ; GFX1010-GISEL-NEXT: s_clause 0x1
1905 ; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1906 ; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
1907 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
1908 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
1909 ; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1910 ; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1911 ; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1912 ; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4
1913 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1914 ; GFX1010-GISEL-NEXT: s_endpgm
1916 ; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_f64:
1917 ; GFX1100-GISEL: ; %bb.0:
1918 ; GFX1100-GISEL-NEXT: s_clause 0x1
1919 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1920 ; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10
1921 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
1922 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
1923 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1924 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1925 ; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1926 ; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4
1927 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1928 ; GFX1100-GISEL-NEXT: s_endpgm
1929 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double 42.0)
1930 store double %writelane, ptr addrspace(1) %out, align 4
1934 define void @test_writelane_half(ptr addrspace(1) %out, half %src, i32 %src1) {
1935 ; GFX802-SDAG-LABEL: test_writelane_half:
1936 ; GFX802-SDAG: ; %bb.0:
1937 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938 ; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
1939 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
1940 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
1941 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
1942 ; GFX802-SDAG-NEXT: s_nop 1
1943 ; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
1944 ; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
1945 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
1946 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
1948 ; GFX1010-SDAG-LABEL: test_writelane_half:
1949 ; GFX1010-SDAG: ; %bb.0:
1950 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
1952 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
1953 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
1954 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
1955 ; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
1956 ; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
1957 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
1959 ; GFX1100-SDAG-LABEL: test_writelane_half:
1960 ; GFX1100-SDAG: ; %bb.0:
1961 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1962 ; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
1963 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
1964 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
1965 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
1966 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1967 ; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
1968 ; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
1969 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
1971 ; GFX802-GISEL-LABEL: test_writelane_half:
1972 ; GFX802-GISEL: ; %bb.0:
1973 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974 ; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
1975 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
1976 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
1977 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
1978 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
1979 ; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
1980 ; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
1981 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
1982 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
1984 ; GFX1010-GISEL-LABEL: test_writelane_half:
1985 ; GFX1010-GISEL: ; %bb.0:
1986 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1987 ; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
1988 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
1989 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
1990 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
1991 ; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
1992 ; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
1993 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
1995 ; GFX1100-GISEL-LABEL: test_writelane_half:
1996 ; GFX1100-GISEL: ; %bb.0:
1997 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998 ; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
1999 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2000 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2001 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2002 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2003 ; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2004 ; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2005 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2006 %oldval = load half, ptr addrspace(1) %out
2007 %writelane = call half @llvm.amdgcn.writelane.f16(half %src, i32 %src1, half %oldval)
2008 store half %writelane, ptr addrspace(1) %out, align 4
2012 define void @test_writelane_float(ptr addrspace(1) %out, float %src, i32 %src1) {
2013 ; GFX802-SDAG-LABEL: test_writelane_float:
2014 ; GFX802-SDAG: ; %bb.0:
2015 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016 ; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1]
2017 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2018 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2019 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2020 ; GFX802-SDAG-NEXT: s_nop 1
2021 ; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2022 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4
2023 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2024 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2026 ; GFX1010-SDAG-LABEL: test_writelane_float:
2027 ; GFX1010-SDAG: ; %bb.0:
2028 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2029 ; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off
2030 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2031 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2032 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2033 ; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2034 ; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off
2035 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2037 ; GFX1100-SDAG-LABEL: test_writelane_float:
2038 ; GFX1100-SDAG: ; %bb.0:
2039 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2040 ; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off
2041 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2042 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2043 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2044 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2045 ; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2046 ; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off
2047 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2049 ; GFX802-GISEL-LABEL: test_writelane_float:
2050 ; GFX802-GISEL: ; %bb.0:
2051 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1]
2053 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2054 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2055 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2056 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2057 ; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2058 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4
2059 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2060 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2062 ; GFX1010-GISEL-LABEL: test_writelane_float:
2063 ; GFX1010-GISEL: ; %bb.0:
2064 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065 ; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off
2066 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2067 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2068 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2069 ; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2070 ; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off
2071 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2073 ; GFX1100-GISEL-LABEL: test_writelane_float:
2074 ; GFX1100-GISEL: ; %bb.0:
2075 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2076 ; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off
2077 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2078 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2079 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2081 ; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2082 ; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off
2083 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2084 %oldval = load float, ptr addrspace(1) %out
2085 %writelane = call float @llvm.amdgcn.writelane.f32(float %src, i32 %src1, float %oldval)
2086 store float %writelane, ptr addrspace(1) %out, align 4
2090 define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1) {
2091 ; GFX802-SDAG-LABEL: test_writelane_bfloat:
2092 ; GFX802-SDAG: ; %bb.0:
2093 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2094 ; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
2095 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2096 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2097 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2098 ; GFX802-SDAG-NEXT: s_nop 1
2099 ; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2100 ; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
2101 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2102 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2104 ; GFX1010-SDAG-LABEL: test_writelane_bfloat:
2105 ; GFX1010-SDAG: ; %bb.0:
2106 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2107 ; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
2108 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2109 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2110 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2111 ; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2112 ; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
2113 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2115 ; GFX1100-SDAG-LABEL: test_writelane_bfloat:
2116 ; GFX1100-SDAG: ; %bb.0:
2117 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2118 ; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
2119 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2120 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2121 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2122 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2123 ; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2124 ; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
2125 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2127 ; GFX802-GISEL-LABEL: test_writelane_bfloat:
2128 ; GFX802-GISEL: ; %bb.0:
2129 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2130 ; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
2131 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2132 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2133 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2134 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2135 ; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2136 ; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
2137 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2138 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2140 ; GFX1010-GISEL-LABEL: test_writelane_bfloat:
2141 ; GFX1010-GISEL: ; %bb.0:
2142 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2143 ; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
2144 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2145 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2146 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2147 ; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2148 ; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
2149 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2151 ; GFX1100-GISEL-LABEL: test_writelane_bfloat:
2152 ; GFX1100-GISEL: ; %bb.0:
2153 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2154 ; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
2155 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2156 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2157 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2158 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2159 ; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2160 ; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2161 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2162 %oldval = load bfloat, ptr addrspace(1) %out
2163 %writelane = call bfloat @llvm.amdgcn.writelane.bf16(bfloat %src, i32 %src1, bfloat %oldval)
2164 store bfloat %writelane, ptr addrspace(1) %out, align 4
2168 define void @test_writelane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) {
2169 ; GFX802-SDAG-LABEL: test_writelane_i16:
2170 ; GFX802-SDAG: ; %bb.0:
2171 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172 ; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
2173 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2174 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2175 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2176 ; GFX802-SDAG-NEXT: s_nop 1
2177 ; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2178 ; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
2179 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2180 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2182 ; GFX1010-SDAG-LABEL: test_writelane_i16:
2183 ; GFX1010-SDAG: ; %bb.0:
2184 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2185 ; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
2186 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2187 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2188 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2189 ; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2190 ; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
2191 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2193 ; GFX1100-SDAG-LABEL: test_writelane_i16:
2194 ; GFX1100-SDAG: ; %bb.0:
2195 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196 ; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
2197 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2198 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2199 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2200 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2201 ; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2202 ; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
2203 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2205 ; GFX802-GISEL-LABEL: test_writelane_i16:
2206 ; GFX802-GISEL: ; %bb.0:
2207 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208 ; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
2209 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2210 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2211 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2212 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2213 ; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2214 ; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
2215 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2216 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2218 ; GFX1010-GISEL-LABEL: test_writelane_i16:
2219 ; GFX1010-GISEL: ; %bb.0:
2220 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2221 ; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
2222 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2223 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2224 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2225 ; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2226 ; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
2227 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2229 ; GFX1100-GISEL-LABEL: test_writelane_i16:
2230 ; GFX1100-GISEL: ; %bb.0:
2231 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2232 ; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
2233 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2234 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2235 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2236 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2237 ; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2238 ; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2239 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2240 %oldval = load i16, ptr addrspace(1) %out
2241 %writelane = call i16 @llvm.amdgcn.writelane.i16(i16 %src, i32 %src1, i16 %oldval)
2242 store i16 %writelane, ptr addrspace(1) %out, align 4
2246 define void @test_writelane_v2f16(ptr addrspace(1) %out, <2 x half> %src, i32 %src1) {
2247 ; GFX802-SDAG-LABEL: test_writelane_v2f16:
2248 ; GFX802-SDAG: ; %bb.0:
2249 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250 ; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1]
2251 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2252 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2253 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2254 ; GFX802-SDAG-NEXT: s_nop 1
2255 ; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2256 ; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4
2257 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2258 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2260 ; GFX1010-SDAG-LABEL: test_writelane_v2f16:
2261 ; GFX1010-SDAG: ; %bb.0:
2262 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263 ; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off
2264 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2265 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2266 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2267 ; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2268 ; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off
2269 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2271 ; GFX1100-SDAG-LABEL: test_writelane_v2f16:
2272 ; GFX1100-SDAG: ; %bb.0:
2273 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2274 ; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off
2275 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2276 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2277 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2278 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2279 ; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2280 ; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off
2281 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2283 ; GFX802-GISEL-LABEL: test_writelane_v2f16:
2284 ; GFX802-GISEL: ; %bb.0:
2285 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286 ; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1]
2287 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2288 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2289 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2290 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2291 ; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2292 ; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4
2293 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2296 ; GFX1010-GISEL-LABEL: test_writelane_v2f16:
2297 ; GFX1010-GISEL: ; %bb.0:
2298 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2299 ; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off
2300 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2301 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2302 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2303 ; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2304 ; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off
2305 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2307 ; GFX1100-GISEL-LABEL: test_writelane_v2f16:
2308 ; GFX1100-GISEL: ; %bb.0:
2309 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off
2311 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2312 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2313 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2314 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2315 ; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2316 ; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off
2317 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2318 %oldval = load <2 x half>, ptr addrspace(1) %out
2319 %writelane = call <2 x half> @llvm.amdgcn.writelane.v2f16(<2 x half> %src, i32 %src1, <2 x half> %oldval)
2320 store <2 x half> %writelane, ptr addrspace(1) %out, align 4
2324 define void @test_readlane_v2f32(ptr addrspace(1) %out, <2 x float> %src, i32 %src1) {
2325 ; GFX802-SDAG-LABEL: test_readlane_v2f32:
2326 ; GFX802-SDAG: ; %bb.0:
2327 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328 ; GFX802-SDAG-NEXT: flat_load_dwordx2 v[5:6], v[0:1]
2329 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v4
2330 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2331 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2332 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2333 ; GFX802-SDAG-NEXT: s_nop 0
2334 ; GFX802-SDAG-NEXT: v_writelane_b32 v6, s4, m0
2335 ; GFX802-SDAG-NEXT: v_writelane_b32 v5, s5, m0
2336 ; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[5:6]
2337 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2338 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2340 ; GFX1010-SDAG-LABEL: test_readlane_v2f32:
2341 ; GFX1010-SDAG: ; %bb.0:
2342 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343 ; GFX1010-SDAG-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
2344 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2345 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v4
2346 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v2
2347 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2348 ; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s4, s5
2349 ; GFX1010-SDAG-NEXT: v_writelane_b32 v5, s6, s5
2350 ; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[5:6], off
2351 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2353 ; GFX1100-SDAG-LABEL: test_readlane_v2f32:
2354 ; GFX1100-SDAG: ; %bb.0:
2355 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356 ; GFX1100-SDAG-NEXT: global_load_b64 v[5:6], v[0:1], off
2357 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v3
2358 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v4
2359 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v2
2360 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2361 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2362 ; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s0, s1
2363 ; GFX1100-SDAG-NEXT: v_writelane_b32 v5, s2, s1
2364 ; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[5:6], off
2365 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2367 ; GFX802-GISEL-LABEL: test_readlane_v2f32:
2368 ; GFX802-GISEL: ; %bb.0:
2369 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370 ; GFX802-GISEL-NEXT: flat_load_dwordx2 v[5:6], v[0:1]
2371 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v4
2372 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2373 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2374 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2375 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2376 ; GFX802-GISEL-NEXT: v_writelane_b32 v5, s4, m0
2377 ; GFX802-GISEL-NEXT: v_writelane_b32 v6, s6, m0
2378 ; GFX802-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[5:6]
2379 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2380 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2382 ; GFX1010-GISEL-LABEL: test_readlane_v2f32:
2383 ; GFX1010-GISEL: ; %bb.0:
2384 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385 ; GFX1010-GISEL-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
2386 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2387 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v4
2388 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2389 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX1010-GISEL-NEXT: v_writelane_b32 v5, s4, s5
2391 ; GFX1010-GISEL-NEXT: v_writelane_b32 v6, s6, s5
2392 ; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[5:6], off
2393 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2395 ; GFX1100-GISEL-LABEL: test_readlane_v2f32:
2396 ; GFX1100-GISEL: ; %bb.0:
2397 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398 ; GFX1100-GISEL-NEXT: global_load_b64 v[5:6], v[0:1], off
2399 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2400 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v4
2401 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2402 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2403 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2404 ; GFX1100-GISEL-NEXT: v_writelane_b32 v5, s0, s1
2405 ; GFX1100-GISEL-NEXT: v_writelane_b32 v6, s2, s1
2406 ; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[5:6], off
2407 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2408 %oldval = load <2 x float>, ptr addrspace(1) %out
2409 %writelane = call <2 x float> @llvm.amdgcn.writelane.v2f32(<2 x float> %src, i32 %src1, <2 x float> %oldval)
2410 store <2 x float> %writelane, ptr addrspace(1) %out, align 4
2414 define void @test_writelane_v7i32(ptr addrspace(1) %out, <7 x i32> %src, i32 %src1) {
2415 ; GFX802-SDAG-LABEL: test_writelane_v7i32:
2416 ; GFX802-SDAG: ; %bb.0:
2417 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418 ; GFX802-SDAG-NEXT: v_add_u32_e32 v17, vcc, 16, v0
2419 ; GFX802-SDAG-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
2420 ; GFX802-SDAG-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc
2421 ; GFX802-SDAG-NEXT: flat_load_dwordx3 v[14:16], v[17:18]
2422 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v9
2423 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v5
2424 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v4
2425 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v3
2426 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v2
2427 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v8
2428 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v7
2429 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v6
2430 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
2431 ; GFX802-SDAG-NEXT: v_writelane_b32 v13, s7, m0
2432 ; GFX802-SDAG-NEXT: v_writelane_b32 v12, s8, m0
2433 ; GFX802-SDAG-NEXT: v_writelane_b32 v11, s9, m0
2434 ; GFX802-SDAG-NEXT: v_writelane_b32 v10, s10, m0
2435 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2436 ; GFX802-SDAG-NEXT: v_writelane_b32 v16, s4, m0
2437 ; GFX802-SDAG-NEXT: v_writelane_b32 v15, s5, m0
2438 ; GFX802-SDAG-NEXT: v_writelane_b32 v14, s6, m0
2439 ; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[10:13]
2440 ; GFX802-SDAG-NEXT: flat_store_dwordx3 v[17:18], v[14:16]
2441 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2442 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2444 ; GFX1010-SDAG-LABEL: test_writelane_v7i32:
2445 ; GFX1010-SDAG: ; %bb.0:
2446 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447 ; GFX1010-SDAG-NEXT: s_clause 0x1
2448 ; GFX1010-SDAG-NEXT: global_load_dwordx3 v[14:16], v[0:1], off offset:16
2449 ; GFX1010-SDAG-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
2450 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v9
2451 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v5
2452 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v4
2453 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v3
2454 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s11, v2
2455 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v8
2456 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v7
2457 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v6
2458 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1)
2459 ; GFX1010-SDAG-NEXT: v_writelane_b32 v16, s4, s5
2460 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2461 ; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s8, s5
2462 ; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s9, s5
2463 ; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s10, s5
2464 ; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s11, s5
2465 ; GFX1010-SDAG-NEXT: v_writelane_b32 v15, s6, s5
2466 ; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s7, s5
2467 ; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
2468 ; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16
2469 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2471 ; GFX1100-SDAG-LABEL: test_writelane_v7i32:
2472 ; GFX1100-SDAG: ; %bb.0:
2473 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474 ; GFX1100-SDAG-NEXT: s_clause 0x1
2475 ; GFX1100-SDAG-NEXT: global_load_b96 v[14:16], v[0:1], off offset:16
2476 ; GFX1100-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off
2477 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v9
2478 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2479 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v4
2480 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v3
2481 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s7, v2
2482 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v8
2483 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v7
2484 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v6
2485 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1)
2486 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3)
2487 ; GFX1100-SDAG-NEXT: v_writelane_b32 v16, s0, s1
2488 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2489 ; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s4, s1
2490 ; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s5, s1
2491 ; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s6, s1
2492 ; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s7, s1
2493 ; GFX1100-SDAG-NEXT: v_writelane_b32 v15, s2, s1
2494 ; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s3, s1
2495 ; GFX1100-SDAG-NEXT: s_clause 0x1
2496 ; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[10:13], off
2497 ; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16
2498 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2500 ; GFX802-GISEL-LABEL: test_writelane_v7i32:
2501 ; GFX802-GISEL: ; %bb.0:
2502 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2503 ; GFX802-GISEL-NEXT: v_add_u32_e32 v18, vcc, 16, v0
2504 ; GFX802-GISEL-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
2505 ; GFX802-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
2506 ; GFX802-GISEL-NEXT: flat_load_dwordx4 v[14:17], v[18:19]
2507 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v9
2508 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2509 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2510 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2511 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2512 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2513 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6
2514 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7
2515 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s11, v8
2516 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
2517 ; GFX802-GISEL-NEXT: v_writelane_b32 v10, s4, m0
2518 ; GFX802-GISEL-NEXT: v_writelane_b32 v11, s6, m0
2519 ; GFX802-GISEL-NEXT: v_writelane_b32 v12, s7, m0
2520 ; GFX802-GISEL-NEXT: v_writelane_b32 v13, s8, m0
2521 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2522 ; GFX802-GISEL-NEXT: v_writelane_b32 v14, s9, m0
2523 ; GFX802-GISEL-NEXT: v_writelane_b32 v15, s10, m0
2524 ; GFX802-GISEL-NEXT: v_writelane_b32 v16, s11, m0
2525 ; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[10:13]
2526 ; GFX802-GISEL-NEXT: flat_store_dwordx3 v[18:19], v[14:16]
2527 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2528 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2530 ; GFX1010-GISEL-LABEL: test_writelane_v7i32:
2531 ; GFX1010-GISEL: ; %bb.0:
2532 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2533 ; GFX1010-GISEL-NEXT: s_clause 0x1
2534 ; GFX1010-GISEL-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
2535 ; GFX1010-GISEL-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:16
2536 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2537 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v9
2538 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2539 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2540 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2541 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6
2542 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7
2543 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s11, v8
2544 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
2545 ; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s4, s5
2546 ; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s6, s5
2547 ; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s7, s5
2548 ; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s8, s5
2549 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2550 ; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s9, s5
2551 ; GFX1010-GISEL-NEXT: v_writelane_b32 v15, s10, s5
2552 ; GFX1010-GISEL-NEXT: v_writelane_b32 v16, s11, s5
2553 ; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
2554 ; GFX1010-GISEL-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16
2555 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2557 ; GFX1100-GISEL-LABEL: test_writelane_v7i32:
2558 ; GFX1100-GISEL: ; %bb.0:
2559 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2560 ; GFX1100-GISEL-NEXT: s_clause 0x1
2561 ; GFX1100-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
2562 ; GFX1100-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
2563 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2564 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v9
2565 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2566 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
2567 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
2568 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2569 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7
2570 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s7, v8
2571 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
2572 ; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s0, s1
2573 ; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s2, s1
2574 ; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s3, s1
2575 ; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s4, s1
2576 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2577 ; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s5, s1
2578 ; GFX1100-GISEL-NEXT: v_writelane_b32 v15, s6, s1
2579 ; GFX1100-GISEL-NEXT: v_writelane_b32 v16, s7, s1
2580 ; GFX1100-GISEL-NEXT: s_clause 0x1
2581 ; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[10:13], off
2582 ; GFX1100-GISEL-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16
2583 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2584 %oldval = load <7 x i32>, ptr addrspace(1) %out
2585 %writelane = call <7 x i32> @llvm.amdgcn.writelane.v7i32(<7 x i32> %src, i32 %src1, <7 x i32> %oldval)
2586 store <7 x i32> %writelane, ptr addrspace(1) %out, align 4
2590 define void @test_writelane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src1) {
2591 ; GFX802-SDAG-LABEL: test_writelane_v8i16:
2592 ; GFX802-SDAG: ; %bb.0:
2593 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594 ; GFX802-SDAG-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
2595 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v6
2596 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2597 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v4
2598 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v3
2599 ; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v2
2600 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2601 ; GFX802-SDAG-NEXT: v_writelane_b32 v10, s4, m0
2602 ; GFX802-SDAG-NEXT: v_writelane_b32 v9, s5, m0
2603 ; GFX802-SDAG-NEXT: v_writelane_b32 v8, s6, m0
2604 ; GFX802-SDAG-NEXT: v_writelane_b32 v7, s7, m0
2605 ; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2606 ; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2607 ; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2609 ; GFX1010-SDAG-LABEL: test_writelane_v8i16:
2610 ; GFX1010-SDAG: ; %bb.0:
2611 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2612 ; GFX1010-SDAG-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2613 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2614 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v6
2615 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v4
2616 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v3
2617 ; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v2
2618 ; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2619 ; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s4, s5
2620 ; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s6, s5
2621 ; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s7, s5
2622 ; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s8, s5
2623 ; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2624 ; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2626 ; GFX1100-SDAG-LABEL: test_writelane_v8i16:
2627 ; GFX1100-SDAG: ; %bb.0:
2628 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629 ; GFX1100-SDAG-NEXT: global_load_b128 v[7:10], v[0:1], off
2630 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
2631 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v6
2632 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v4
2633 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v3
2634 ; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2635 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2636 ; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s0, s1
2637 ; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s2, s1
2638 ; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s3, s1
2639 ; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s4, s1
2640 ; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[7:10], off
2641 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2643 ; GFX802-GISEL-LABEL: test_writelane_v8i16:
2644 ; GFX802-GISEL: ; %bb.0:
2645 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2646 ; GFX802-GISEL-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
2647 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2648 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2649 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2650 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2651 ; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2652 ; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2653 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2654 ; GFX802-GISEL-NEXT: v_writelane_b32 v7, s4, m0
2655 ; GFX802-GISEL-NEXT: v_writelane_b32 v8, s6, m0
2656 ; GFX802-GISEL-NEXT: v_writelane_b32 v9, s7, m0
2657 ; GFX802-GISEL-NEXT: v_writelane_b32 v10, s8, m0
2658 ; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2659 ; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2660 ; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2662 ; GFX1010-GISEL-LABEL: test_writelane_v8i16:
2663 ; GFX1010-GISEL: ; %bb.0:
2664 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2665 ; GFX1010-GISEL-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2666 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2667 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2668 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2669 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2670 ; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2671 ; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2672 ; GFX1010-GISEL-NEXT: v_writelane_b32 v7, s4, s5
2673 ; GFX1010-GISEL-NEXT: v_writelane_b32 v8, s6, s5
2674 ; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s7, s5
2675 ; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s8, s5
2676 ; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2677 ; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2679 ; GFX1100-GISEL-LABEL: test_writelane_v8i16:
2680 ; GFX1100-GISEL: ; %bb.0:
2681 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2682 ; GFX1100-GISEL-NEXT: global_load_b128 v[7:10], v[0:1], off
2683 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2684 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v6
2685 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2686 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
2687 ; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
2688 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2689 ; GFX1100-GISEL-NEXT: v_writelane_b32 v7, s0, s1
2690 ; GFX1100-GISEL-NEXT: v_writelane_b32 v8, s2, s1
2691 ; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s3, s1
2692 ; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s4, s1
2693 ; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[7:10], off
2694 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2695 %oldval = load <8 x i16>, ptr addrspace(1) %out
2696 %writelane = call <8 x i16> @llvm.amdgcn.writelane.v8i16(<8 x i16> %src, i32 %src1, <8 x i16> %oldval)
2697 store <8 x i16> %writelane, ptr addrspace(1) %out, align 4
2701 declare i32 @llvm.amdgcn.workitem.id.x() #2
2703 attributes #0 = { nounwind readnone convergent }
2704 attributes #1 = { nounwind }
2705 attributes #2 = { nounwind readnone }