1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
4 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
5 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
6 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
7 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
8 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12
10 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
11 ; GFX67-LABEL: s_buffer_load_imm:
12 ; GFX67: ; %bb.0: ; %main_body
13 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1
14 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
15 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
16 ; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm
17 ; GFX67-NEXT: s_endpgm
19 ; GFX8910-LABEL: s_buffer_load_imm:
20 ; GFX8910: ; %bb.0: ; %main_body
21 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x4
22 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
23 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
24 ; GFX8910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
25 ; GFX8910-NEXT: s_endpgm
27 ; GFX11-LABEL: s_buffer_load_imm:
28 ; GFX11: ; %bb.0: ; %main_body
29 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
30 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
31 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
32 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
33 ; GFX11-NEXT: s_endpgm
35 ; GFX12-LABEL: s_buffer_load_imm:
36 ; GFX12: ; %bb.0: ; %main_body
37 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
38 ; GFX12-NEXT: s_wait_kmcnt 0x0
39 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
40 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
41 ; GFX12-NEXT: s_endpgm
43 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
44 %bitcast = bitcast i32 %load to float
45 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
49 define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
50 ; GFX678-LABEL: s_buffer_load_index:
51 ; GFX678: ; %bb.0: ; %main_body
52 ; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4
53 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
54 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
55 ; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
56 ; GFX678-NEXT: s_endpgm
58 ; GFX910-LABEL: s_buffer_load_index:
59 ; GFX910: ; %bb.0: ; %main_body
60 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
61 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
62 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
63 ; GFX910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
64 ; GFX910-NEXT: s_endpgm
66 ; GFX11-LABEL: s_buffer_load_index:
67 ; GFX11: ; %bb.0: ; %main_body
68 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
69 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
70 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
71 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
72 ; GFX11-NEXT: s_endpgm
74 ; GFX12-LABEL: s_buffer_load_index:
75 ; GFX12: ; %bb.0: ; %main_body
76 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
77 ; GFX12-NEXT: s_wait_kmcnt 0x0
78 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
79 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
80 ; GFX12-NEXT: s_endpgm
82 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
83 %bitcast = bitcast i32 %load to float
84 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
88 define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
89 ; GFX678910-LABEL: s_buffer_load_index_divergent:
90 ; GFX678910: ; %bb.0: ; %main_body
91 ; GFX678910-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
92 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
93 ; GFX678910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
94 ; GFX678910-NEXT: s_endpgm
96 ; GFX11-LABEL: s_buffer_load_index_divergent:
97 ; GFX11: ; %bb.0: ; %main_body
98 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
99 ; GFX11-NEXT: s_waitcnt vmcnt(0)
100 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
101 ; GFX11-NEXT: s_endpgm
103 ; GFX12-LABEL: s_buffer_load_index_divergent:
104 ; GFX12: ; %bb.0: ; %main_body
105 ; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
106 ; GFX12-NEXT: s_wait_loadcnt 0x0
107 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
108 ; GFX12-NEXT: s_endpgm
110 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
111 %bitcast = bitcast i32 %load to float
112 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
116 define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
117 ; GFX67-LABEL: s_buffer_loadx2_imm:
118 ; GFX67: ; %bb.0: ; %main_body
119 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
120 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
122 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
123 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
124 ; GFX67-NEXT: s_endpgm
126 ; GFX8910-LABEL: s_buffer_loadx2_imm:
127 ; GFX8910: ; %bb.0: ; %main_body
128 ; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
129 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
130 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
131 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
132 ; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
133 ; GFX8910-NEXT: s_endpgm
135 ; GFX11-LABEL: s_buffer_loadx2_imm:
136 ; GFX11: ; %bb.0: ; %main_body
137 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
138 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
139 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
140 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
141 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
142 ; GFX11-NEXT: s_endpgm
144 ; GFX12-LABEL: s_buffer_loadx2_imm:
145 ; GFX12: ; %bb.0: ; %main_body
146 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
147 ; GFX12-NEXT: s_wait_kmcnt 0x0
148 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
149 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
150 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
151 ; GFX12-NEXT: s_endpgm
153 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
154 %bitcast = bitcast <2 x i32> %load to <2 x float>
155 %x = extractelement <2 x float> %bitcast, i32 0
156 %y = extractelement <2 x float> %bitcast, i32 1
157 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
161 define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
162 ; GFX678-LABEL: s_buffer_loadx2_index:
163 ; GFX678: ; %bb.0: ; %main_body
164 ; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4
165 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
167 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
168 ; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
169 ; GFX678-NEXT: s_endpgm
171 ; GFX910-LABEL: s_buffer_loadx2_index:
172 ; GFX910: ; %bb.0: ; %main_body
173 ; GFX910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0
174 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
175 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
176 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
177 ; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
178 ; GFX910-NEXT: s_endpgm
180 ; GFX11-LABEL: s_buffer_loadx2_index:
181 ; GFX11: ; %bb.0: ; %main_body
182 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
183 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
184 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
185 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
186 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
187 ; GFX11-NEXT: s_endpgm
189 ; GFX12-LABEL: s_buffer_loadx2_index:
190 ; GFX12: ; %bb.0: ; %main_body
191 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
192 ; GFX12-NEXT: s_wait_kmcnt 0x0
193 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
194 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
195 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
196 ; GFX12-NEXT: s_endpgm
198 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
199 %bitcast = bitcast <2 x i32> %load to <2 x float>
200 %x = extractelement <2 x float> %bitcast, i32 0
201 %y = extractelement <2 x float> %bitcast, i32 1
202 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
206 define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
207 ; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
208 ; GFX678910: ; %bb.0: ; %main_body
209 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
210 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
211 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
212 ; GFX678910-NEXT: s_endpgm
214 ; GFX11-LABEL: s_buffer_loadx2_index_divergent:
215 ; GFX11: ; %bb.0: ; %main_body
216 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
217 ; GFX11-NEXT: s_waitcnt vmcnt(0)
218 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
219 ; GFX11-NEXT: s_endpgm
221 ; GFX12-LABEL: s_buffer_loadx2_index_divergent:
222 ; GFX12: ; %bb.0: ; %main_body
223 ; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen
224 ; GFX12-NEXT: s_wait_loadcnt 0x0
225 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
226 ; GFX12-NEXT: s_endpgm
228 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
229 %bitcast = bitcast <2 x i32> %load to <2 x float>
230 %x = extractelement <2 x float> %bitcast, i32 0
231 %y = extractelement <2 x float> %bitcast, i32 1
232 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
236 define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
237 ; GFX67-LABEL: s_buffer_loadx3_imm:
238 ; GFX67: ; %bb.0: ; %main_body
239 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
240 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
241 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
242 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
243 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
244 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm
245 ; GFX67-NEXT: s_endpgm
247 ; GFX8910-LABEL: s_buffer_loadx3_imm:
248 ; GFX8910: ; %bb.0: ; %main_body
249 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
250 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
252 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
253 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
254 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
255 ; GFX8910-NEXT: s_endpgm
257 ; GFX11-LABEL: s_buffer_loadx3_imm:
258 ; GFX11: ; %bb.0: ; %main_body
259 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x40
260 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
262 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
263 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
264 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
265 ; GFX11-NEXT: s_endpgm
267 ; GFX12-LABEL: s_buffer_loadx3_imm:
268 ; GFX12: ; %bb.0: ; %main_body
269 ; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], 0x40
270 ; GFX12-NEXT: s_wait_kmcnt 0x0
271 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
272 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
273 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
274 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
275 ; GFX12-NEXT: s_endpgm
277 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
278 %bitcast = bitcast <3 x i32> %load to <3 x float>
279 %x = extractelement <3 x float> %bitcast, i32 0
280 %y = extractelement <3 x float> %bitcast, i32 1
281 %z = extractelement <3 x float> %bitcast, i32 2
282 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
286 define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
287 ; GFX678-LABEL: s_buffer_loadx3_index:
288 ; GFX678: ; %bb.0: ; %main_body
289 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
290 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
291 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
292 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
293 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
294 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm
295 ; GFX678-NEXT: s_endpgm
297 ; GFX910-LABEL: s_buffer_loadx3_index:
298 ; GFX910: ; %bb.0: ; %main_body
299 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
300 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
302 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
303 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
304 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
305 ; GFX910-NEXT: s_endpgm
307 ; GFX11-LABEL: s_buffer_loadx3_index:
308 ; GFX11: ; %bb.0: ; %main_body
309 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
310 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
311 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
312 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
313 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
314 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
315 ; GFX11-NEXT: s_endpgm
317 ; GFX12-LABEL: s_buffer_loadx3_index:
318 ; GFX12: ; %bb.0: ; %main_body
319 ; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], s4 offset:0x0
320 ; GFX12-NEXT: s_wait_kmcnt 0x0
321 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
322 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
323 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
324 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
325 ; GFX12-NEXT: s_endpgm
327 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
328 %bitcast = bitcast <3 x i32> %load to <3 x float>
329 %x = extractelement <3 x float> %bitcast, i32 0
330 %y = extractelement <3 x float> %bitcast, i32 1
331 %z = extractelement <3 x float> %bitcast, i32 2
332 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
336 define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
337 ; GFX6-LABEL: s_buffer_loadx3_index_divergent:
338 ; GFX6: ; %bb.0: ; %main_body
339 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
340 ; GFX6-NEXT: s_waitcnt vmcnt(0)
341 ; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm
342 ; GFX6-NEXT: s_endpgm
344 ; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
345 ; GFX78910: ; %bb.0: ; %main_body
346 ; GFX78910-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
347 ; GFX78910-NEXT: s_waitcnt vmcnt(0)
348 ; GFX78910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
349 ; GFX78910-NEXT: s_endpgm
351 ; GFX11-LABEL: s_buffer_loadx3_index_divergent:
352 ; GFX11: ; %bb.0: ; %main_body
353 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
354 ; GFX11-NEXT: s_waitcnt vmcnt(0)
355 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
356 ; GFX11-NEXT: s_endpgm
358 ; GFX12-LABEL: s_buffer_loadx3_index_divergent:
359 ; GFX12: ; %bb.0: ; %main_body
360 ; GFX12-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], null offen
361 ; GFX12-NEXT: s_wait_loadcnt 0x0
362 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
363 ; GFX12-NEXT: s_endpgm
365 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
366 %bitcast = bitcast <3 x i32> %load to <3 x float>
367 %x = extractelement <3 x float> %bitcast, i32 0
368 %y = extractelement <3 x float> %bitcast, i32 1
369 %z = extractelement <3 x float> %bitcast, i32 2
370 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
374 define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
375 ; GFX67-LABEL: s_buffer_loadx4_imm:
376 ; GFX67: ; %bb.0: ; %main_body
377 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
378 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
379 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
380 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
381 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
382 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
383 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
384 ; GFX67-NEXT: s_endpgm
386 ; GFX8910-LABEL: s_buffer_loadx4_imm:
387 ; GFX8910: ; %bb.0: ; %main_body
388 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
389 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
390 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
391 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
392 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
393 ; GFX8910-NEXT: v_mov_b32_e32 v3, s3
394 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
395 ; GFX8910-NEXT: s_endpgm
397 ; GFX11-LABEL: s_buffer_loadx4_imm:
398 ; GFX11: ; %bb.0: ; %main_body
399 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
400 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
401 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
402 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
403 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
404 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
405 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
406 ; GFX11-NEXT: s_endpgm
408 ; GFX12-LABEL: s_buffer_loadx4_imm:
409 ; GFX12: ; %bb.0: ; %main_body
410 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
411 ; GFX12-NEXT: s_wait_kmcnt 0x0
412 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
413 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
414 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
415 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
416 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
417 ; GFX12-NEXT: s_endpgm
419 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
420 %bitcast = bitcast <4 x i32> %load to <4 x float>
421 %x = extractelement <4 x float> %bitcast, i32 0
422 %y = extractelement <4 x float> %bitcast, i32 1
423 %z = extractelement <4 x float> %bitcast, i32 2
424 %w = extractelement <4 x float> %bitcast, i32 3
425 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
429 define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
430 ; GFX678-LABEL: s_buffer_loadx4_index:
431 ; GFX678: ; %bb.0: ; %main_body
432 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
433 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
434 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
435 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
436 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
437 ; GFX678-NEXT: v_mov_b32_e32 v3, s3
438 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
439 ; GFX678-NEXT: s_endpgm
441 ; GFX910-LABEL: s_buffer_loadx4_index:
442 ; GFX910: ; %bb.0: ; %main_body
443 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
444 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
445 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
446 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
447 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
448 ; GFX910-NEXT: v_mov_b32_e32 v3, s3
449 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
450 ; GFX910-NEXT: s_endpgm
452 ; GFX11-LABEL: s_buffer_loadx4_index:
453 ; GFX11: ; %bb.0: ; %main_body
454 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
455 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
456 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
457 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
458 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
459 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
460 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
461 ; GFX11-NEXT: s_endpgm
463 ; GFX12-LABEL: s_buffer_loadx4_index:
464 ; GFX12: ; %bb.0: ; %main_body
465 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
466 ; GFX12-NEXT: s_wait_kmcnt 0x0
467 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
468 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
469 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
470 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
471 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
472 ; GFX12-NEXT: s_endpgm
474 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
475 %bitcast = bitcast <4 x i32> %load to <4 x float>
476 %x = extractelement <4 x float> %bitcast, i32 0
477 %y = extractelement <4 x float> %bitcast, i32 1
478 %z = extractelement <4 x float> %bitcast, i32 2
479 %w = extractelement <4 x float> %bitcast, i32 3
480 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
484 define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
485 ; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
486 ; GFX678910: ; %bb.0: ; %main_body
487 ; GFX678910-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
488 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
489 ; GFX678910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
490 ; GFX678910-NEXT: s_endpgm
492 ; GFX11-LABEL: s_buffer_loadx4_index_divergent:
493 ; GFX11: ; %bb.0: ; %main_body
494 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
495 ; GFX11-NEXT: s_waitcnt vmcnt(0)
496 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
497 ; GFX11-NEXT: s_endpgm
499 ; GFX12-LABEL: s_buffer_loadx4_index_divergent:
500 ; GFX12: ; %bb.0: ; %main_body
501 ; GFX12-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], null offen
502 ; GFX12-NEXT: s_wait_loadcnt 0x0
503 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
504 ; GFX12-NEXT: s_endpgm
506 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
507 %bitcast = bitcast <4 x i32> %load to <4 x float>
508 %x = extractelement <4 x float> %bitcast, i32 0
509 %y = extractelement <4 x float> %bitcast, i32 1
510 %z = extractelement <4 x float> %bitcast, i32 2
511 %w = extractelement <4 x float> %bitcast, i32 3
512 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
516 define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
517 ; GFX67-LABEL: s_buffer_load_imm_mergex2:
518 ; GFX67: ; %bb.0: ; %main_body
519 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
520 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
521 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
522 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
523 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
524 ; GFX67-NEXT: s_endpgm
526 ; GFX8-LABEL: s_buffer_load_imm_mergex2:
527 ; GFX8: ; %bb.0: ; %main_body
528 ; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
529 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
530 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
531 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
532 ; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm
533 ; GFX8-NEXT: s_endpgm
535 ; GFX910-LABEL: s_buffer_load_imm_mergex2:
536 ; GFX910: ; %bb.0: ; %main_body
537 ; GFX910-NEXT: s_buffer_load_dwordx2 s[4:5], s[0:3], 0x4
538 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
539 ; GFX910-NEXT: v_mov_b32_e32 v0, s4
540 ; GFX910-NEXT: v_mov_b32_e32 v1, s5
541 ; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
542 ; GFX910-NEXT: s_endpgm
544 ; GFX11-LABEL: s_buffer_load_imm_mergex2:
545 ; GFX11: ; %bb.0: ; %main_body
546 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
547 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
548 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
549 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
550 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
551 ; GFX11-NEXT: s_endpgm
553 ; GFX12-LABEL: s_buffer_load_imm_mergex2:
554 ; GFX12: ; %bb.0: ; %main_body
555 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
556 ; GFX12-NEXT: s_wait_kmcnt 0x0
557 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
558 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
559 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
560 ; GFX12-NEXT: s_endpgm
562 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
563 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
564 %x = bitcast i32 %load0 to float
565 %y = bitcast i32 %load1 to float
566 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
570 define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
571 ; GFX67-LABEL: s_buffer_load_imm_mergex4:
572 ; GFX67: ; %bb.0: ; %main_body
573 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
574 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
575 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
576 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
577 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
578 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
579 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
580 ; GFX67-NEXT: s_endpgm
582 ; GFX8-LABEL: s_buffer_load_imm_mergex4:
583 ; GFX8: ; %bb.0: ; %main_body
584 ; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
585 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
586 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
587 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
588 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
589 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
590 ; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm
591 ; GFX8-NEXT: s_endpgm
593 ; GFX910-LABEL: s_buffer_load_imm_mergex4:
594 ; GFX910: ; %bb.0: ; %main_body
595 ; GFX910-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x8
596 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
597 ; GFX910-NEXT: v_mov_b32_e32 v0, s4
598 ; GFX910-NEXT: v_mov_b32_e32 v1, s5
599 ; GFX910-NEXT: v_mov_b32_e32 v2, s6
600 ; GFX910-NEXT: v_mov_b32_e32 v3, s7
601 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
602 ; GFX910-NEXT: s_endpgm
604 ; GFX11-LABEL: s_buffer_load_imm_mergex4:
605 ; GFX11: ; %bb.0: ; %main_body
606 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
607 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
608 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
609 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
610 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
611 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
612 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
613 ; GFX11-NEXT: s_endpgm
615 ; GFX12-LABEL: s_buffer_load_imm_mergex4:
616 ; GFX12: ; %bb.0: ; %main_body
617 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
618 ; GFX12-NEXT: s_wait_kmcnt 0x0
619 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
620 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
621 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
622 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
623 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
624 ; GFX12-NEXT: s_endpgm
626 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
627 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
628 %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
629 %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
630 %x = bitcast i32 %load0 to float
631 %y = bitcast i32 %load1 to float
632 %z = bitcast i32 %load2 to float
633 %w = bitcast i32 %load3 to float
634 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
638 @gv = external addrspace(1) global i32
640 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
641 ; GFX6-LABEL: s_buffer_load_index_across_bb:
642 ; GFX6: ; %bb.0: ; %main_body
643 ; GFX6-NEXT: s_getpc_b64 s[4:5]
644 ; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
645 ; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
646 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
647 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
648 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
649 ; GFX6-NEXT: s_mov_b32 s6, -1
650 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
651 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
652 ; GFX6-NEXT: s_waitcnt expcnt(0)
653 ; GFX6-NEXT: v_or_b32_e32 v0, 8, v0
654 ; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
655 ; GFX6-NEXT: s_waitcnt vmcnt(0)
656 ; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm
657 ; GFX6-NEXT: s_endpgm
659 ; GFX7-LABEL: s_buffer_load_index_across_bb:
660 ; GFX7: ; %bb.0: ; %main_body
661 ; GFX7-NEXT: s_getpc_b64 s[4:5]
662 ; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
663 ; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
664 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
665 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
666 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
667 ; GFX7-NEXT: s_mov_b32 s6, -1
668 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
669 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
670 ; GFX7-NEXT: v_or_b32_e32 v0, 8, v0
671 ; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
672 ; GFX7-NEXT: s_waitcnt vmcnt(0)
673 ; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm
674 ; GFX7-NEXT: s_endpgm
676 ; GFX8-LABEL: s_buffer_load_index_across_bb:
677 ; GFX8: ; %bb.0: ; %main_body
678 ; GFX8-NEXT: s_getpc_b64 s[4:5]
679 ; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
680 ; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
681 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
682 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
683 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
684 ; GFX8-NEXT: v_mov_b32_e32 v1, s4
685 ; GFX8-NEXT: v_mov_b32_e32 v2, s5
686 ; GFX8-NEXT: flat_store_dword v[1:2], v0
687 ; GFX8-NEXT: v_or_b32_e32 v0, 8, v0
688 ; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
689 ; GFX8-NEXT: s_waitcnt vmcnt(0)
690 ; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
691 ; GFX8-NEXT: s_endpgm
693 ; GFX9-LABEL: s_buffer_load_index_across_bb:
694 ; GFX9: ; %bb.0: ; %main_body
695 ; GFX9-NEXT: s_getpc_b64 s[4:5]
696 ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
697 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
698 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
699 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
700 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
701 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
702 ; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
703 ; GFX9-NEXT: v_or_b32_e32 v0, 8, v0
704 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
705 ; GFX9-NEXT: s_waitcnt vmcnt(0)
706 ; GFX9-NEXT: exp mrt0 v0, v0, v0, v0 done vm
707 ; GFX9-NEXT: s_endpgm
709 ; GFX10-LABEL: s_buffer_load_index_across_bb:
710 ; GFX10: ; %bb.0: ; %main_body
711 ; GFX10-NEXT: s_getpc_b64 s[4:5]
712 ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
713 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
714 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
715 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
716 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
717 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
718 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
719 ; GFX10-NEXT: v_or_b32_e32 v0, 8, v0
720 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
721 ; GFX10-NEXT: s_waitcnt vmcnt(0)
722 ; GFX10-NEXT: exp mrt0 v0, v0, v0, v0 done vm
723 ; GFX10-NEXT: s_endpgm
725 ; GFX11-LABEL: s_buffer_load_index_across_bb:
726 ; GFX11: ; %bb.0: ; %main_body
727 ; GFX11-NEXT: s_getpc_b64 s[4:5]
728 ; GFX11-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
729 ; GFX11-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
730 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
731 ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
732 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
733 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
734 ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
735 ; GFX11-NEXT: v_or_b32_e32 v0, 8, v0
736 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
737 ; GFX11-NEXT: s_waitcnt vmcnt(0)
738 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
739 ; GFX11-NEXT: s_endpgm
741 ; GFX12-LABEL: s_buffer_load_index_across_bb:
742 ; GFX12: ; %bb.0: ; %main_body
743 ; GFX12-NEXT: s_getpc_b64 s[4:5]
744 ; GFX12-NEXT: s_sext_i32_i16 s5, s5
745 ; GFX12-NEXT: s_add_co_u32 s4, s4, gv@gotpcrel32@lo+8
746 ; GFX12-NEXT: s_add_co_ci_u32 s5, s5, gv@gotpcrel32@hi+16
747 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
748 ; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
749 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
750 ; GFX12-NEXT: s_wait_kmcnt 0x0
751 ; GFX12-NEXT: global_store_b32 v1, v0, s[4:5]
752 ; GFX12-NEXT: v_or_b32_e32 v0, 8, v0
753 ; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
754 ; GFX12-NEXT: s_wait_loadcnt 0x0
755 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
756 ; GFX12-NEXT: s_endpgm
758 %tmp = shl i32 %index, 4
759 store i32 %tmp, ptr addrspace(1) @gv
762 bb1: ; preds = %main_body
763 %tmp1 = or i32 %tmp, 8
764 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
765 %bitcast = bitcast i32 %load to float
766 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
770 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
771 ; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
772 ; GFX678910: ; %bb.0: ; %main_body
773 ; GFX678910-NEXT: v_lshlrev_b32_e32 v0, 4, v0
774 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
775 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
776 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
777 ; GFX678910-NEXT: s_endpgm
779 ; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
780 ; GFX11: ; %bb.0: ; %main_body
781 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
782 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
783 ; GFX11-NEXT: s_waitcnt vmcnt(0)
784 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
785 ; GFX11-NEXT: s_endpgm
787 ; GFX12-LABEL: s_buffer_load_index_across_bb_merged:
788 ; GFX12: ; %bb.0: ; %main_body
789 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
790 ; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen offset:8
791 ; GFX12-NEXT: s_wait_loadcnt 0x0
792 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
793 ; GFX12-NEXT: s_endpgm
795 %tmp = shl i32 %index, 4
798 bb1: ; preds = %main_body
799 %tmp1 = or i32 %tmp, 8
800 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
801 %tmp2 = or i32 %tmp1, 4
802 %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
803 %bitcast = bitcast i32 %load to float
804 %bitcast2 = bitcast i32 %load2 to float
805 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
809 define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
810 ; GFX6-LABEL: s_buffer_load_imm_neg1:
812 ; GFX6-NEXT: s_mov_b32 s4, -1
814 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
815 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
816 ; GFX6-NEXT: ; return to shader part epilog
818 ; GFX78-LABEL: s_buffer_load_imm_neg1:
820 ; GFX78-NEXT: s_mov_b32 s4, -1
821 ; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4
822 ; GFX78-NEXT: s_waitcnt lgkmcnt(0)
823 ; GFX78-NEXT: ; return to shader part epilog
825 ; GFX910-LABEL: s_buffer_load_imm_neg1:
827 ; GFX910-NEXT: s_mov_b32 s4, -1
828 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
829 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
830 ; GFX910-NEXT: ; return to shader part epilog
832 ; GFX11-LABEL: s_buffer_load_imm_neg1:
834 ; GFX11-NEXT: s_mov_b32 s4, -1
835 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
836 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
837 ; GFX11-NEXT: ; return to shader part epilog
839 ; GFX12-LABEL: s_buffer_load_imm_neg1:
841 ; GFX12-NEXT: s_mov_b32 s4, -1
842 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
843 ; GFX12-NEXT: s_wait_kmcnt 0x0
844 ; GFX12-NEXT: ; return to shader part epilog
845 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
849 define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
850 ; GFX6-LABEL: s_buffer_load_imm_neg4:
852 ; GFX6-NEXT: s_mov_b32 s4, -4
854 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
855 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
856 ; GFX6-NEXT: ; return to shader part epilog
858 ; GFX7-LABEL: s_buffer_load_imm_neg4:
860 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff
861 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
862 ; GFX7-NEXT: ; return to shader part epilog
864 ; GFX8-LABEL: s_buffer_load_imm_neg4:
866 ; GFX8-NEXT: s_mov_b32 s4, -4
867 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
868 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
869 ; GFX8-NEXT: ; return to shader part epilog
871 ; GFX910-LABEL: s_buffer_load_imm_neg4:
873 ; GFX910-NEXT: s_mov_b32 s4, -4
874 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
875 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
876 ; GFX910-NEXT: ; return to shader part epilog
878 ; GFX11-LABEL: s_buffer_load_imm_neg4:
880 ; GFX11-NEXT: s_mov_b32 s4, -4
881 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
882 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
883 ; GFX11-NEXT: ; return to shader part epilog
885 ; GFX12-LABEL: s_buffer_load_imm_neg4:
887 ; GFX12-NEXT: s_mov_b32 s4, -4
888 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
889 ; GFX12-NEXT: s_wait_kmcnt 0x0
890 ; GFX12-NEXT: ; return to shader part epilog
891 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
895 define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
896 ; GFX6-LABEL: s_buffer_load_imm_neg8:
898 ; GFX6-NEXT: s_mov_b32 s4, -8
900 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
901 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
902 ; GFX6-NEXT: ; return to shader part epilog
904 ; GFX7-LABEL: s_buffer_load_imm_neg8:
906 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe
907 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
908 ; GFX7-NEXT: ; return to shader part epilog
910 ; GFX8-LABEL: s_buffer_load_imm_neg8:
912 ; GFX8-NEXT: s_mov_b32 s4, -8
913 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
914 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
915 ; GFX8-NEXT: ; return to shader part epilog
917 ; GFX910-LABEL: s_buffer_load_imm_neg8:
919 ; GFX910-NEXT: s_mov_b32 s4, -8
920 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
921 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
922 ; GFX910-NEXT: ; return to shader part epilog
924 ; GFX11-LABEL: s_buffer_load_imm_neg8:
926 ; GFX11-NEXT: s_mov_b32 s4, -8
927 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
928 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
929 ; GFX11-NEXT: ; return to shader part epilog
931 ; GFX12-LABEL: s_buffer_load_imm_neg8:
933 ; GFX12-NEXT: s_mov_b32 s4, -8
934 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
935 ; GFX12-NEXT: s_wait_kmcnt 0x0
936 ; GFX12-NEXT: ; return to shader part epilog
937 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
941 define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
942 ; GFX6-LABEL: s_buffer_load_imm_bit31:
944 ; GFX6-NEXT: s_brev_b32 s4, 1
946 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
947 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
948 ; GFX6-NEXT: ; return to shader part epilog
950 ; GFX7-LABEL: s_buffer_load_imm_bit31:
952 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000
953 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
954 ; GFX7-NEXT: ; return to shader part epilog
956 ; GFX8-LABEL: s_buffer_load_imm_bit31:
958 ; GFX8-NEXT: s_brev_b32 s4, 1
959 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
960 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
961 ; GFX8-NEXT: ; return to shader part epilog
963 ; GFX910-LABEL: s_buffer_load_imm_bit31:
965 ; GFX910-NEXT: s_brev_b32 s4, 1
966 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
967 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
968 ; GFX910-NEXT: ; return to shader part epilog
970 ; GFX11-LABEL: s_buffer_load_imm_bit31:
972 ; GFX11-NEXT: s_brev_b32 s4, 1
973 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
974 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
975 ; GFX11-NEXT: ; return to shader part epilog
977 ; GFX12-LABEL: s_buffer_load_imm_bit31:
979 ; GFX12-NEXT: s_brev_b32 s4, 1
980 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
981 ; GFX12-NEXT: s_wait_kmcnt 0x0
982 ; GFX12-NEXT: ; return to shader part epilog
983 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
987 define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
988 ; GFX6-LABEL: s_buffer_load_imm_bit30:
990 ; GFX6-NEXT: s_mov_b32 s4, 2.0
992 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
993 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
994 ; GFX6-NEXT: ; return to shader part epilog
996 ; GFX7-LABEL: s_buffer_load_imm_bit30:
998 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000
999 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1000 ; GFX7-NEXT: ; return to shader part epilog
1002 ; GFX8-LABEL: s_buffer_load_imm_bit30:
1004 ; GFX8-NEXT: s_mov_b32 s4, 2.0
1005 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1006 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1007 ; GFX8-NEXT: ; return to shader part epilog
1009 ; GFX910-LABEL: s_buffer_load_imm_bit30:
1011 ; GFX910-NEXT: s_mov_b32 s4, 2.0
1012 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1013 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1014 ; GFX910-NEXT: ; return to shader part epilog
1016 ; GFX11-LABEL: s_buffer_load_imm_bit30:
1018 ; GFX11-NEXT: s_mov_b32 s4, 2.0
1019 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1020 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1021 ; GFX11-NEXT: ; return to shader part epilog
1023 ; GFX12-LABEL: s_buffer_load_imm_bit30:
1025 ; GFX12-NEXT: s_mov_b32 s4, 2.0
1026 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1027 ; GFX12-NEXT: s_wait_kmcnt 0x0
1028 ; GFX12-NEXT: ; return to shader part epilog
1029 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
1033 define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
1034 ; GFX6-LABEL: s_buffer_load_imm_bit29:
1036 ; GFX6-NEXT: s_brev_b32 s4, 4
1037 ; GFX6-NEXT: s_nop 3
1038 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1039 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1040 ; GFX6-NEXT: ; return to shader part epilog
1042 ; GFX7-LABEL: s_buffer_load_imm_bit29:
1044 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000
1045 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1046 ; GFX7-NEXT: ; return to shader part epilog
1048 ; GFX8-LABEL: s_buffer_load_imm_bit29:
1050 ; GFX8-NEXT: s_brev_b32 s4, 4
1051 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1052 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1053 ; GFX8-NEXT: ; return to shader part epilog
1055 ; GFX910-LABEL: s_buffer_load_imm_bit29:
1057 ; GFX910-NEXT: s_brev_b32 s4, 4
1058 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1059 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1060 ; GFX910-NEXT: ; return to shader part epilog
1062 ; GFX11-LABEL: s_buffer_load_imm_bit29:
1064 ; GFX11-NEXT: s_brev_b32 s4, 4
1065 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1066 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1067 ; GFX11-NEXT: ; return to shader part epilog
1069 ; GFX12-LABEL: s_buffer_load_imm_bit29:
1071 ; GFX12-NEXT: s_brev_b32 s4, 4
1072 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1073 ; GFX12-NEXT: s_wait_kmcnt 0x0
1074 ; GFX12-NEXT: ; return to shader part epilog
1075 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1079 define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
1080 ; GFX6-LABEL: s_buffer_load_imm_bit21:
1082 ; GFX6-NEXT: s_mov_b32 s4, 0x200000
1083 ; GFX6-NEXT: s_nop 3
1084 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1085 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1086 ; GFX6-NEXT: ; return to shader part epilog
1088 ; GFX7-LABEL: s_buffer_load_imm_bit21:
1090 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
1091 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1092 ; GFX7-NEXT: ; return to shader part epilog
1094 ; GFX8-LABEL: s_buffer_load_imm_bit21:
1096 ; GFX8-NEXT: s_mov_b32 s4, 0x200000
1097 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1098 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1099 ; GFX8-NEXT: ; return to shader part epilog
1101 ; GFX910-LABEL: s_buffer_load_imm_bit21:
1103 ; GFX910-NEXT: s_mov_b32 s4, 0x200000
1104 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1105 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1106 ; GFX910-NEXT: ; return to shader part epilog
1108 ; GFX11-LABEL: s_buffer_load_imm_bit21:
1110 ; GFX11-NEXT: s_mov_b32 s4, 0x200000
1111 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1112 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1113 ; GFX11-NEXT: ; return to shader part epilog
1115 ; GFX12-LABEL: s_buffer_load_imm_bit21:
1117 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x200000
1118 ; GFX12-NEXT: s_wait_kmcnt 0x0
1119 ; GFX12-NEXT: ; return to shader part epilog
1120 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1124 define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
1125 ; GFX6-LABEL: s_buffer_load_imm_bit20:
1127 ; GFX6-NEXT: s_mov_b32 s4, 0x100000
1128 ; GFX6-NEXT: s_nop 3
1129 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1130 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1131 ; GFX6-NEXT: ; return to shader part epilog
1133 ; GFX7-LABEL: s_buffer_load_imm_bit20:
1135 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000
1136 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1137 ; GFX7-NEXT: ; return to shader part epilog
1139 ; GFX8-LABEL: s_buffer_load_imm_bit20:
1141 ; GFX8-NEXT: s_mov_b32 s4, 0x100000
1142 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1143 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1144 ; GFX8-NEXT: ; return to shader part epilog
1146 ; GFX910-LABEL: s_buffer_load_imm_bit20:
1148 ; GFX910-NEXT: s_mov_b32 s4, 0x100000
1149 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1150 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1151 ; GFX910-NEXT: ; return to shader part epilog
1153 ; GFX11-LABEL: s_buffer_load_imm_bit20:
1155 ; GFX11-NEXT: s_mov_b32 s4, 0x100000
1156 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1157 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1158 ; GFX11-NEXT: ; return to shader part epilog
1160 ; GFX12-LABEL: s_buffer_load_imm_bit20:
1162 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100000
1163 ; GFX12-NEXT: s_wait_kmcnt 0x0
1164 ; GFX12-NEXT: ; return to shader part epilog
1165 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1169 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
1170 ; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
1172 ; GFX6-NEXT: s_mov_b32 s4, 0xfff00000
1173 ; GFX6-NEXT: s_nop 3
1174 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1175 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1176 ; GFX6-NEXT: ; return to shader part epilog
1178 ; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
1180 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000
1181 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1182 ; GFX7-NEXT: ; return to shader part epilog
1184 ; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
1186 ; GFX8-NEXT: s_mov_b32 s4, 0xfff00000
1187 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1188 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1189 ; GFX8-NEXT: ; return to shader part epilog
1191 ; GFX910-LABEL: s_buffer_load_imm_neg_bit20:
1193 ; GFX910-NEXT: s_mov_b32 s4, 0xfff00000
1194 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1195 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1196 ; GFX910-NEXT: ; return to shader part epilog
1198 ; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
1200 ; GFX11-NEXT: s_mov_b32 s4, 0xfff00000
1201 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1202 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1203 ; GFX11-NEXT: ; return to shader part epilog
1205 ; GFX12-LABEL: s_buffer_load_imm_neg_bit20:
1207 ; GFX12-NEXT: s_mov_b32 s4, 0xfff00000
1208 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1209 ; GFX12-NEXT: s_wait_kmcnt 0x0
1210 ; GFX12-NEXT: ; return to shader part epilog
1211 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
1215 define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
1216 ; GFX6-LABEL: s_buffer_load_imm_bit19:
1218 ; GFX6-NEXT: s_mov_b32 s4, 0x80000
1219 ; GFX6-NEXT: s_nop 3
1220 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1221 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1222 ; GFX6-NEXT: ; return to shader part epilog
1224 ; GFX7-LABEL: s_buffer_load_imm_bit19:
1226 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000
1227 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1228 ; GFX7-NEXT: ; return to shader part epilog
1230 ; GFX8910-LABEL: s_buffer_load_imm_bit19:
1232 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
1233 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1234 ; GFX8910-NEXT: ; return to shader part epilog
1236 ; GFX11-LABEL: s_buffer_load_imm_bit19:
1238 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
1239 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1240 ; GFX11-NEXT: ; return to shader part epilog
1242 ; GFX12-LABEL: s_buffer_load_imm_bit19:
1244 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
1245 ; GFX12-NEXT: s_wait_kmcnt 0x0
1246 ; GFX12-NEXT: ; return to shader part epilog
1247 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1251 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
1252 ; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
1254 ; GFX6-NEXT: s_mov_b32 s4, 0xfff80000
1255 ; GFX6-NEXT: s_nop 3
1256 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1257 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1258 ; GFX6-NEXT: ; return to shader part epilog
1260 ; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
1262 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000
1263 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1264 ; GFX7-NEXT: ; return to shader part epilog
1266 ; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
1268 ; GFX8-NEXT: s_mov_b32 s4, 0xfff80000
1269 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1270 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1271 ; GFX8-NEXT: ; return to shader part epilog
1273 ; GFX910-LABEL: s_buffer_load_imm_neg_bit19:
1275 ; GFX910-NEXT: s_mov_b32 s4, 0xfff80000
1276 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1277 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1278 ; GFX910-NEXT: ; return to shader part epilog
1280 ; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
1282 ; GFX11-NEXT: s_mov_b32 s4, 0xfff80000
1283 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1284 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1285 ; GFX11-NEXT: ; return to shader part epilog
1287 ; GFX12-LABEL: s_buffer_load_imm_neg_bit19:
1289 ; GFX12-NEXT: s_mov_b32 s4, 0xfff80000
1290 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1291 ; GFX12-NEXT: s_wait_kmcnt 0x0
1292 ; GFX12-NEXT: ; return to shader part epilog
1293 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1297 define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
1298 ; GFX6-LABEL: s_buffer_load_imm_255:
1300 ; GFX6-NEXT: s_movk_i32 s4, 0xff
1301 ; GFX6-NEXT: s_nop 3
1302 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1303 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1304 ; GFX6-NEXT: ; return to shader part epilog
1306 ; GFX7-LABEL: s_buffer_load_imm_255:
1308 ; GFX7-NEXT: s_movk_i32 s4, 0xff
1309 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1310 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1311 ; GFX7-NEXT: ; return to shader part epilog
1313 ; GFX8910-LABEL: s_buffer_load_imm_255:
1315 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1316 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1317 ; GFX8910-NEXT: ; return to shader part epilog
1319 ; GFX11-LABEL: s_buffer_load_imm_255:
1321 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
1322 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1323 ; GFX11-NEXT: ; return to shader part epilog
1325 ; GFX12-LABEL: s_buffer_load_imm_255:
1327 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
1328 ; GFX12-NEXT: s_wait_kmcnt 0x0
1329 ; GFX12-NEXT: ; return to shader part epilog
1330 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
1334 define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
1335 ; GFX67-LABEL: s_buffer_load_imm_256:
1337 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40
1338 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1339 ; GFX67-NEXT: ; return to shader part epilog
1341 ; GFX8910-LABEL: s_buffer_load_imm_256:
1343 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1344 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1345 ; GFX8910-NEXT: ; return to shader part epilog
1347 ; GFX11-LABEL: s_buffer_load_imm_256:
1349 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
1350 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1351 ; GFX11-NEXT: ; return to shader part epilog
1353 ; GFX12-LABEL: s_buffer_load_imm_256:
1355 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
1356 ; GFX12-NEXT: s_wait_kmcnt 0x0
1357 ; GFX12-NEXT: ; return to shader part epilog
1358 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
1362 define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
1363 ; GFX67-LABEL: s_buffer_load_imm_1016:
1365 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe
1366 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1367 ; GFX67-NEXT: ; return to shader part epilog
1369 ; GFX8910-LABEL: s_buffer_load_imm_1016:
1371 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8
1372 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1373 ; GFX8910-NEXT: ; return to shader part epilog
1375 ; GFX11-LABEL: s_buffer_load_imm_1016:
1377 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
1378 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1379 ; GFX11-NEXT: ; return to shader part epilog
1381 ; GFX12-LABEL: s_buffer_load_imm_1016:
1383 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
1384 ; GFX12-NEXT: s_wait_kmcnt 0x0
1385 ; GFX12-NEXT: ; return to shader part epilog
1386 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
1390 define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
1391 ; GFX67-LABEL: s_buffer_load_imm_1020:
1393 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1394 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1395 ; GFX67-NEXT: ; return to shader part epilog
1397 ; GFX8910-LABEL: s_buffer_load_imm_1020:
1399 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc
1400 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1401 ; GFX8910-NEXT: ; return to shader part epilog
1403 ; GFX11-LABEL: s_buffer_load_imm_1020:
1405 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
1406 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1407 ; GFX11-NEXT: ; return to shader part epilog
1409 ; GFX12-LABEL: s_buffer_load_imm_1020:
1411 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
1412 ; GFX12-NEXT: s_wait_kmcnt 0x0
1413 ; GFX12-NEXT: ; return to shader part epilog
1414 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
1418 define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
1419 ; GFX6-LABEL: s_buffer_load_imm_1021:
1421 ; GFX6-NEXT: s_movk_i32 s4, 0x3fd
1422 ; GFX6-NEXT: s_nop 3
1423 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1424 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1425 ; GFX6-NEXT: ; return to shader part epilog
1427 ; GFX7-LABEL: s_buffer_load_imm_1021:
1429 ; GFX7-NEXT: s_movk_i32 s4, 0x3fd
1430 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1431 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1432 ; GFX7-NEXT: ; return to shader part epilog
1434 ; GFX8910-LABEL: s_buffer_load_imm_1021:
1436 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd
1437 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1438 ; GFX8910-NEXT: ; return to shader part epilog
1440 ; GFX11-LABEL: s_buffer_load_imm_1021:
1442 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
1443 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1444 ; GFX11-NEXT: ; return to shader part epilog
1446 ; GFX12-LABEL: s_buffer_load_imm_1021:
1448 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
1449 ; GFX12-NEXT: s_wait_kmcnt 0x0
1450 ; GFX12-NEXT: ; return to shader part epilog
1451 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
1455 define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
1456 ; GFX6-LABEL: s_buffer_load_imm_1024:
1458 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1459 ; GFX6-NEXT: s_nop 3
1460 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1461 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1462 ; GFX6-NEXT: ; return to shader part epilog
1464 ; GFX7-LABEL: s_buffer_load_imm_1024:
1466 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1467 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1468 ; GFX7-NEXT: ; return to shader part epilog
1470 ; GFX8910-LABEL: s_buffer_load_imm_1024:
1472 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1473 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1474 ; GFX8910-NEXT: ; return to shader part epilog
1476 ; GFX11-LABEL: s_buffer_load_imm_1024:
1478 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1479 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1480 ; GFX11-NEXT: ; return to shader part epilog
1482 ; GFX12-LABEL: s_buffer_load_imm_1024:
1484 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1485 ; GFX12-NEXT: s_wait_kmcnt 0x0
1486 ; GFX12-NEXT: ; return to shader part epilog
1487 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1491 define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
1492 ; GFX6-LABEL: s_buffer_load_imm_1025:
1494 ; GFX6-NEXT: s_movk_i32 s4, 0x401
1495 ; GFX6-NEXT: s_nop 3
1496 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1497 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1498 ; GFX6-NEXT: ; return to shader part epilog
1500 ; GFX7-LABEL: s_buffer_load_imm_1025:
1502 ; GFX7-NEXT: s_movk_i32 s4, 0x401
1503 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1504 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1505 ; GFX7-NEXT: ; return to shader part epilog
1507 ; GFX8910-LABEL: s_buffer_load_imm_1025:
1509 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x401
1510 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1511 ; GFX8910-NEXT: ; return to shader part epilog
1513 ; GFX11-LABEL: s_buffer_load_imm_1025:
1515 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
1516 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1517 ; GFX11-NEXT: ; return to shader part epilog
1519 ; GFX12-LABEL: s_buffer_load_imm_1025:
1521 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
1522 ; GFX12-NEXT: s_wait_kmcnt 0x0
1523 ; GFX12-NEXT: ; return to shader part epilog
1524 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
1528 define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
1529 ; GFX6-LABEL: s_buffer_load_imm_1028:
1531 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1532 ; GFX6-NEXT: s_nop 3
1533 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1534 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1535 ; GFX6-NEXT: ; return to shader part epilog
1537 ; GFX7-LABEL: s_buffer_load_imm_1028:
1539 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1540 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1541 ; GFX7-NEXT: ; return to shader part epilog
1543 ; GFX8910-LABEL: s_buffer_load_imm_1028:
1545 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1546 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1547 ; GFX8910-NEXT: ; return to shader part epilog
1549 ; GFX11-LABEL: s_buffer_load_imm_1028:
1551 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1552 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1553 ; GFX11-NEXT: ; return to shader part epilog
1555 ; GFX12-LABEL: s_buffer_load_imm_1028:
1557 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1558 ; GFX12-NEXT: s_wait_kmcnt 0x0
1559 ; GFX12-NEXT: ; return to shader part epilog
1560 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1564 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
1565 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
1566 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
1567 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
1568 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
1570 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: