1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
4 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
5 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
6 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
7 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
8 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12
10 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
11 ; GFX67-LABEL: s_buffer_load_imm:
12 ; GFX67: ; %bb.0: ; %main_body
13 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1
14 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
15 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
16 ; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm
17 ; GFX67-NEXT: s_endpgm
19 ; GFX8910-LABEL: s_buffer_load_imm:
20 ; GFX8910: ; %bb.0: ; %main_body
21 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x4
22 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
23 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
24 ; GFX8910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
25 ; GFX8910-NEXT: s_endpgm
27 ; GFX11-LABEL: s_buffer_load_imm:
28 ; GFX11: ; %bb.0: ; %main_body
29 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
30 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
31 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
32 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
33 ; GFX11-NEXT: s_endpgm
35 ; GFX12-LABEL: s_buffer_load_imm:
36 ; GFX12: ; %bb.0: ; %main_body
37 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
38 ; GFX12-NEXT: s_wait_kmcnt 0x0
39 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
40 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
41 ; GFX12-NEXT: s_endpgm
43 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
44 %bitcast = bitcast i32 %load to float
45 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
49 define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
50 ; GFX678-LABEL: s_buffer_load_index:
51 ; GFX678: ; %bb.0: ; %main_body
52 ; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4
53 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
54 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
55 ; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
56 ; GFX678-NEXT: s_endpgm
58 ; GFX910-LABEL: s_buffer_load_index:
59 ; GFX910: ; %bb.0: ; %main_body
60 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
61 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
62 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
63 ; GFX910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
64 ; GFX910-NEXT: s_endpgm
66 ; GFX11-LABEL: s_buffer_load_index:
67 ; GFX11: ; %bb.0: ; %main_body
68 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
69 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
70 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
71 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
72 ; GFX11-NEXT: s_endpgm
74 ; GFX12-LABEL: s_buffer_load_index:
75 ; GFX12: ; %bb.0: ; %main_body
76 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
77 ; GFX12-NEXT: s_wait_kmcnt 0x0
78 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
79 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
80 ; GFX12-NEXT: s_endpgm
82 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
83 %bitcast = bitcast i32 %load to float
84 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
88 define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
89 ; GFX678910-LABEL: s_buffer_load_index_divergent:
90 ; GFX678910: ; %bb.0: ; %main_body
91 ; GFX678910-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
92 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
93 ; GFX678910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
94 ; GFX678910-NEXT: s_endpgm
96 ; GFX11-LABEL: s_buffer_load_index_divergent:
97 ; GFX11: ; %bb.0: ; %main_body
98 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
99 ; GFX11-NEXT: s_waitcnt vmcnt(0)
100 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
101 ; GFX11-NEXT: s_endpgm
103 ; GFX12-LABEL: s_buffer_load_index_divergent:
104 ; GFX12: ; %bb.0: ; %main_body
105 ; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
106 ; GFX12-NEXT: s_wait_loadcnt 0x0
107 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
108 ; GFX12-NEXT: s_endpgm
110 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
111 %bitcast = bitcast i32 %load to float
112 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
116 define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
117 ; GFX67-LABEL: s_buffer_loadx2_imm:
118 ; GFX67: ; %bb.0: ; %main_body
119 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
120 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
122 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
123 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
124 ; GFX67-NEXT: s_endpgm
126 ; GFX8910-LABEL: s_buffer_loadx2_imm:
127 ; GFX8910: ; %bb.0: ; %main_body
128 ; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
129 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
130 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
131 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
132 ; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
133 ; GFX8910-NEXT: s_endpgm
135 ; GFX11-LABEL: s_buffer_loadx2_imm:
136 ; GFX11: ; %bb.0: ; %main_body
137 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
138 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
139 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
140 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
141 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
142 ; GFX11-NEXT: s_endpgm
144 ; GFX12-LABEL: s_buffer_loadx2_imm:
145 ; GFX12: ; %bb.0: ; %main_body
146 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
147 ; GFX12-NEXT: s_wait_kmcnt 0x0
148 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
149 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
150 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
151 ; GFX12-NEXT: s_endpgm
153 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
154 %bitcast = bitcast <2 x i32> %load to <2 x float>
155 %x = extractelement <2 x float> %bitcast, i32 0
156 %y = extractelement <2 x float> %bitcast, i32 1
157 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
161 define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
162 ; GFX678-LABEL: s_buffer_loadx2_index:
163 ; GFX678: ; %bb.0: ; %main_body
164 ; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4
165 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
167 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
168 ; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
169 ; GFX678-NEXT: s_endpgm
171 ; GFX910-LABEL: s_buffer_loadx2_index:
172 ; GFX910: ; %bb.0: ; %main_body
173 ; GFX910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0
174 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
175 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
176 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
177 ; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
178 ; GFX910-NEXT: s_endpgm
180 ; GFX11-LABEL: s_buffer_loadx2_index:
181 ; GFX11: ; %bb.0: ; %main_body
182 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
183 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
184 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
185 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
186 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
187 ; GFX11-NEXT: s_endpgm
189 ; GFX12-LABEL: s_buffer_loadx2_index:
190 ; GFX12: ; %bb.0: ; %main_body
191 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
192 ; GFX12-NEXT: s_wait_kmcnt 0x0
193 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
194 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
195 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
196 ; GFX12-NEXT: s_endpgm
198 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
199 %bitcast = bitcast <2 x i32> %load to <2 x float>
200 %x = extractelement <2 x float> %bitcast, i32 0
201 %y = extractelement <2 x float> %bitcast, i32 1
202 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
206 define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
207 ; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
208 ; GFX678910: ; %bb.0: ; %main_body
209 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
210 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
211 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
212 ; GFX678910-NEXT: s_endpgm
214 ; GFX11-LABEL: s_buffer_loadx2_index_divergent:
215 ; GFX11: ; %bb.0: ; %main_body
216 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
217 ; GFX11-NEXT: s_waitcnt vmcnt(0)
218 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
219 ; GFX11-NEXT: s_endpgm
221 ; GFX12-LABEL: s_buffer_loadx2_index_divergent:
222 ; GFX12: ; %bb.0: ; %main_body
223 ; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen
224 ; GFX12-NEXT: s_wait_loadcnt 0x0
225 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
226 ; GFX12-NEXT: s_endpgm
228 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
229 %bitcast = bitcast <2 x i32> %load to <2 x float>
230 %x = extractelement <2 x float> %bitcast, i32 0
231 %y = extractelement <2 x float> %bitcast, i32 1
232 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
236 define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
237 ; GFX67-LABEL: s_buffer_loadx3_imm:
238 ; GFX67: ; %bb.0: ; %main_body
239 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
240 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
241 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
242 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
243 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
244 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm
245 ; GFX67-NEXT: s_endpgm
247 ; GFX8910-LABEL: s_buffer_loadx3_imm:
248 ; GFX8910: ; %bb.0: ; %main_body
249 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
250 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
252 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
253 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
254 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
255 ; GFX8910-NEXT: s_endpgm
257 ; GFX11-LABEL: s_buffer_loadx3_imm:
258 ; GFX11: ; %bb.0: ; %main_body
259 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x40
260 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
262 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
263 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
264 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
265 ; GFX11-NEXT: s_endpgm
267 ; GFX12-LABEL: s_buffer_loadx3_imm:
268 ; GFX12: ; %bb.0: ; %main_body
269 ; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], 0x40
270 ; GFX12-NEXT: s_wait_kmcnt 0x0
271 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
272 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
273 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
274 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
275 ; GFX12-NEXT: s_endpgm
277 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
278 %bitcast = bitcast <3 x i32> %load to <3 x float>
279 %x = extractelement <3 x float> %bitcast, i32 0
280 %y = extractelement <3 x float> %bitcast, i32 1
281 %z = extractelement <3 x float> %bitcast, i32 2
282 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
286 define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
287 ; GFX678-LABEL: s_buffer_loadx3_index:
288 ; GFX678: ; %bb.0: ; %main_body
289 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
290 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
291 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
292 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
293 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
294 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm
295 ; GFX678-NEXT: s_endpgm
297 ; GFX910-LABEL: s_buffer_loadx3_index:
298 ; GFX910: ; %bb.0: ; %main_body
299 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
300 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
302 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
303 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
304 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
305 ; GFX910-NEXT: s_endpgm
307 ; GFX11-LABEL: s_buffer_loadx3_index:
308 ; GFX11: ; %bb.0: ; %main_body
309 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
310 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
311 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
312 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
313 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
314 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
315 ; GFX11-NEXT: s_endpgm
317 ; GFX12-LABEL: s_buffer_loadx3_index:
318 ; GFX12: ; %bb.0: ; %main_body
319 ; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], s4 offset:0x0
320 ; GFX12-NEXT: s_wait_kmcnt 0x0
321 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
322 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
323 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
324 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
325 ; GFX12-NEXT: s_endpgm
327 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
328 %bitcast = bitcast <3 x i32> %load to <3 x float>
329 %x = extractelement <3 x float> %bitcast, i32 0
330 %y = extractelement <3 x float> %bitcast, i32 1
331 %z = extractelement <3 x float> %bitcast, i32 2
332 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
336 define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
337 ; GFX6-LABEL: s_buffer_loadx3_index_divergent:
338 ; GFX6: ; %bb.0: ; %main_body
339 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
340 ; GFX6-NEXT: s_waitcnt vmcnt(0)
341 ; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm
342 ; GFX6-NEXT: s_endpgm
344 ; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
345 ; GFX78910: ; %bb.0: ; %main_body
346 ; GFX78910-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
347 ; GFX78910-NEXT: s_waitcnt vmcnt(0)
348 ; GFX78910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
349 ; GFX78910-NEXT: s_endpgm
351 ; GFX11-LABEL: s_buffer_loadx3_index_divergent:
352 ; GFX11: ; %bb.0: ; %main_body
353 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
354 ; GFX11-NEXT: s_waitcnt vmcnt(0)
355 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
356 ; GFX11-NEXT: s_endpgm
358 ; GFX12-LABEL: s_buffer_loadx3_index_divergent:
359 ; GFX12: ; %bb.0: ; %main_body
360 ; GFX12-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], null offen
361 ; GFX12-NEXT: s_wait_loadcnt 0x0
362 ; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done
363 ; GFX12-NEXT: s_endpgm
365 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
366 %bitcast = bitcast <3 x i32> %load to <3 x float>
367 %x = extractelement <3 x float> %bitcast, i32 0
368 %y = extractelement <3 x float> %bitcast, i32 1
369 %z = extractelement <3 x float> %bitcast, i32 2
370 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
374 define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
375 ; GFX67-LABEL: s_buffer_loadx4_imm:
376 ; GFX67: ; %bb.0: ; %main_body
377 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
378 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
379 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
380 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
381 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
382 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
383 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
384 ; GFX67-NEXT: s_endpgm
386 ; GFX8910-LABEL: s_buffer_loadx4_imm:
387 ; GFX8910: ; %bb.0: ; %main_body
388 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
389 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
390 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
391 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
392 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
393 ; GFX8910-NEXT: v_mov_b32_e32 v3, s3
394 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
395 ; GFX8910-NEXT: s_endpgm
397 ; GFX11-LABEL: s_buffer_loadx4_imm:
398 ; GFX11: ; %bb.0: ; %main_body
399 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
400 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
401 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
402 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
403 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
404 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
405 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
406 ; GFX11-NEXT: s_endpgm
408 ; GFX12-LABEL: s_buffer_loadx4_imm:
409 ; GFX12: ; %bb.0: ; %main_body
410 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
411 ; GFX12-NEXT: s_wait_kmcnt 0x0
412 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
413 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
414 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
415 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
416 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
417 ; GFX12-NEXT: s_endpgm
419 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
420 %bitcast = bitcast <4 x i32> %load to <4 x float>
421 %x = extractelement <4 x float> %bitcast, i32 0
422 %y = extractelement <4 x float> %bitcast, i32 1
423 %z = extractelement <4 x float> %bitcast, i32 2
424 %w = extractelement <4 x float> %bitcast, i32 3
425 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
429 define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
430 ; GFX678-LABEL: s_buffer_loadx4_index:
431 ; GFX678: ; %bb.0: ; %main_body
432 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
433 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
434 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
435 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
436 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
437 ; GFX678-NEXT: v_mov_b32_e32 v3, s3
438 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
439 ; GFX678-NEXT: s_endpgm
441 ; GFX910-LABEL: s_buffer_loadx4_index:
442 ; GFX910: ; %bb.0: ; %main_body
443 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
444 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
445 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
446 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
447 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
448 ; GFX910-NEXT: v_mov_b32_e32 v3, s3
449 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
450 ; GFX910-NEXT: s_endpgm
452 ; GFX11-LABEL: s_buffer_loadx4_index:
453 ; GFX11: ; %bb.0: ; %main_body
454 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
455 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
456 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
457 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
458 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
459 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
460 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
461 ; GFX11-NEXT: s_endpgm
463 ; GFX12-LABEL: s_buffer_loadx4_index:
464 ; GFX12: ; %bb.0: ; %main_body
465 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
466 ; GFX12-NEXT: s_wait_kmcnt 0x0
467 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
468 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
469 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
470 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
471 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
472 ; GFX12-NEXT: s_endpgm
474 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
475 %bitcast = bitcast <4 x i32> %load to <4 x float>
476 %x = extractelement <4 x float> %bitcast, i32 0
477 %y = extractelement <4 x float> %bitcast, i32 1
478 %z = extractelement <4 x float> %bitcast, i32 2
479 %w = extractelement <4 x float> %bitcast, i32 3
480 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
484 define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
485 ; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
486 ; GFX678910: ; %bb.0: ; %main_body
487 ; GFX678910-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
488 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
489 ; GFX678910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
490 ; GFX678910-NEXT: s_endpgm
492 ; GFX11-LABEL: s_buffer_loadx4_index_divergent:
493 ; GFX11: ; %bb.0: ; %main_body
494 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
495 ; GFX11-NEXT: s_waitcnt vmcnt(0)
496 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
497 ; GFX11-NEXT: s_endpgm
499 ; GFX12-LABEL: s_buffer_loadx4_index_divergent:
500 ; GFX12: ; %bb.0: ; %main_body
501 ; GFX12-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], null offen
502 ; GFX12-NEXT: s_wait_loadcnt 0x0
503 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
504 ; GFX12-NEXT: s_endpgm
506 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
507 %bitcast = bitcast <4 x i32> %load to <4 x float>
508 %x = extractelement <4 x float> %bitcast, i32 0
509 %y = extractelement <4 x float> %bitcast, i32 1
510 %z = extractelement <4 x float> %bitcast, i32 2
511 %w = extractelement <4 x float> %bitcast, i32 3
512 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
516 define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
517 ; GFX67-LABEL: s_buffer_load_imm_mergex2:
518 ; GFX67: ; %bb.0: ; %main_body
519 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
520 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
521 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
522 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
523 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
524 ; GFX67-NEXT: s_endpgm
526 ; GFX8910-LABEL: s_buffer_load_imm_mergex2:
527 ; GFX8910: ; %bb.0: ; %main_body
528 ; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
529 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
530 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
531 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
532 ; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
533 ; GFX8910-NEXT: s_endpgm
535 ; GFX11-LABEL: s_buffer_load_imm_mergex2:
536 ; GFX11: ; %bb.0: ; %main_body
537 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
538 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
539 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
540 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
541 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
542 ; GFX11-NEXT: s_endpgm
544 ; GFX12-LABEL: s_buffer_load_imm_mergex2:
545 ; GFX12: ; %bb.0: ; %main_body
546 ; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
547 ; GFX12-NEXT: s_wait_kmcnt 0x0
548 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
549 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
550 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
551 ; GFX12-NEXT: s_endpgm
553 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
554 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
555 %x = bitcast i32 %load0 to float
556 %y = bitcast i32 %load1 to float
557 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
561 define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
562 ; GFX67-LABEL: s_buffer_load_imm_mergex4:
563 ; GFX67: ; %bb.0: ; %main_body
564 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
565 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
566 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
567 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
568 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
569 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
570 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
571 ; GFX67-NEXT: s_endpgm
573 ; GFX8910-LABEL: s_buffer_load_imm_mergex4:
574 ; GFX8910: ; %bb.0: ; %main_body
575 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
576 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
577 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
578 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
579 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
580 ; GFX8910-NEXT: v_mov_b32_e32 v3, s3
581 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
582 ; GFX8910-NEXT: s_endpgm
584 ; GFX11-LABEL: s_buffer_load_imm_mergex4:
585 ; GFX11: ; %bb.0: ; %main_body
586 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
587 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
588 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
589 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
590 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
591 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
592 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
593 ; GFX11-NEXT: s_endpgm
595 ; GFX12-LABEL: s_buffer_load_imm_mergex4:
596 ; GFX12: ; %bb.0: ; %main_body
597 ; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
598 ; GFX12-NEXT: s_wait_kmcnt 0x0
599 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
600 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
601 ; GFX12-NEXT: v_mov_b32_e32 v2, s2
602 ; GFX12-NEXT: v_mov_b32_e32 v3, s3
603 ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
604 ; GFX12-NEXT: s_endpgm
606 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
607 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
608 %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
609 %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
610 %x = bitcast i32 %load0 to float
611 %y = bitcast i32 %load1 to float
612 %z = bitcast i32 %load2 to float
613 %w = bitcast i32 %load3 to float
614 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
618 @gv = external addrspace(1) global i32
620 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
621 ; GFX6-LABEL: s_buffer_load_index_across_bb:
622 ; GFX6: ; %bb.0: ; %main_body
623 ; GFX6-NEXT: s_getpc_b64 s[4:5]
624 ; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
625 ; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
626 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
627 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
628 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
629 ; GFX6-NEXT: s_mov_b32 s6, -1
630 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
631 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
632 ; GFX6-NEXT: s_waitcnt expcnt(0)
633 ; GFX6-NEXT: v_or_b32_e32 v0, 8, v0
634 ; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
635 ; GFX6-NEXT: s_waitcnt vmcnt(0)
636 ; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm
637 ; GFX6-NEXT: s_endpgm
639 ; GFX7-LABEL: s_buffer_load_index_across_bb:
640 ; GFX7: ; %bb.0: ; %main_body
641 ; GFX7-NEXT: s_getpc_b64 s[4:5]
642 ; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
643 ; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
644 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
645 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
646 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
647 ; GFX7-NEXT: s_mov_b32 s6, -1
648 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
649 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
650 ; GFX7-NEXT: v_or_b32_e32 v0, 8, v0
651 ; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
652 ; GFX7-NEXT: s_waitcnt vmcnt(0)
653 ; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm
654 ; GFX7-NEXT: s_endpgm
656 ; GFX8-LABEL: s_buffer_load_index_across_bb:
657 ; GFX8: ; %bb.0: ; %main_body
658 ; GFX8-NEXT: s_getpc_b64 s[4:5]
659 ; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
660 ; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
661 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
662 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
663 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
664 ; GFX8-NEXT: v_mov_b32_e32 v1, s4
665 ; GFX8-NEXT: v_mov_b32_e32 v2, s5
666 ; GFX8-NEXT: flat_store_dword v[1:2], v0
667 ; GFX8-NEXT: v_or_b32_e32 v0, 8, v0
668 ; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
669 ; GFX8-NEXT: s_waitcnt vmcnt(0)
670 ; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
671 ; GFX8-NEXT: s_endpgm
673 ; GFX9-LABEL: s_buffer_load_index_across_bb:
674 ; GFX9: ; %bb.0: ; %main_body
675 ; GFX9-NEXT: s_getpc_b64 s[4:5]
676 ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
677 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
678 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
679 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
680 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
681 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
682 ; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
683 ; GFX9-NEXT: v_or_b32_e32 v0, 8, v0
684 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
685 ; GFX9-NEXT: s_waitcnt vmcnt(0)
686 ; GFX9-NEXT: exp mrt0 v0, v0, v0, v0 done vm
687 ; GFX9-NEXT: s_endpgm
689 ; GFX10-LABEL: s_buffer_load_index_across_bb:
690 ; GFX10: ; %bb.0: ; %main_body
691 ; GFX10-NEXT: s_getpc_b64 s[4:5]
692 ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
693 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
694 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
695 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
696 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
697 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
698 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
699 ; GFX10-NEXT: v_or_b32_e32 v0, 8, v0
700 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
701 ; GFX10-NEXT: s_waitcnt vmcnt(0)
702 ; GFX10-NEXT: exp mrt0 v0, v0, v0, v0 done vm
703 ; GFX10-NEXT: s_endpgm
705 ; GFX11-LABEL: s_buffer_load_index_across_bb:
706 ; GFX11: ; %bb.0: ; %main_body
707 ; GFX11-NEXT: s_getpc_b64 s[4:5]
708 ; GFX11-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
709 ; GFX11-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
710 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
711 ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
712 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
713 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
714 ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
715 ; GFX11-NEXT: v_or_b32_e32 v0, 8, v0
716 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
717 ; GFX11-NEXT: s_waitcnt vmcnt(0)
718 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
719 ; GFX11-NEXT: s_nop 0
720 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
721 ; GFX11-NEXT: s_endpgm
723 ; GFX12-LABEL: s_buffer_load_index_across_bb:
724 ; GFX12: ; %bb.0: ; %main_body
725 ; GFX12-NEXT: s_getpc_b64 s[4:5]
726 ; GFX12-NEXT: s_sext_i32_i16 s5, s5
727 ; GFX12-NEXT: s_add_co_u32 s4, s4, gv@gotpcrel32@lo+8
728 ; GFX12-NEXT: s_add_co_ci_u32 s5, s5, gv@gotpcrel32@hi+16
729 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
730 ; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
731 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
732 ; GFX12-NEXT: s_wait_kmcnt 0x0
733 ; GFX12-NEXT: global_store_b32 v1, v0, s[4:5]
734 ; GFX12-NEXT: v_or_b32_e32 v0, 8, v0
735 ; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
736 ; GFX12-NEXT: s_wait_loadcnt 0x0
737 ; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done
738 ; GFX12-NEXT: s_nop 0
739 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
740 ; GFX12-NEXT: s_endpgm
742 %tmp = shl i32 %index, 4
743 store i32 %tmp, ptr addrspace(1) @gv
746 bb1: ; preds = %main_body
747 %tmp1 = or i32 %tmp, 8
748 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
749 %bitcast = bitcast i32 %load to float
750 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
754 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
755 ; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
756 ; GFX678910: ; %bb.0: ; %main_body
757 ; GFX678910-NEXT: v_lshlrev_b32_e32 v0, 4, v0
758 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
759 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
760 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
761 ; GFX678910-NEXT: s_endpgm
763 ; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
764 ; GFX11: ; %bb.0: ; %main_body
765 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
766 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
767 ; GFX11-NEXT: s_waitcnt vmcnt(0)
768 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
769 ; GFX11-NEXT: s_endpgm
771 ; GFX12-LABEL: s_buffer_load_index_across_bb_merged:
772 ; GFX12: ; %bb.0: ; %main_body
773 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
774 ; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen offset:8
775 ; GFX12-NEXT: s_wait_loadcnt 0x0
776 ; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done
777 ; GFX12-NEXT: s_endpgm
779 %tmp = shl i32 %index, 4
782 bb1: ; preds = %main_body
783 %tmp1 = or i32 %tmp, 8
784 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
785 %tmp2 = or i32 %tmp1, 4
786 %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
787 %bitcast = bitcast i32 %load to float
788 %bitcast2 = bitcast i32 %load2 to float
789 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
793 define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
794 ; GFX6-LABEL: s_buffer_load_imm_neg1:
796 ; GFX6-NEXT: s_mov_b32 s4, -1
798 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
799 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
800 ; GFX6-NEXT: ; return to shader part epilog
802 ; GFX78-LABEL: s_buffer_load_imm_neg1:
804 ; GFX78-NEXT: s_mov_b32 s4, -1
805 ; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4
806 ; GFX78-NEXT: s_waitcnt lgkmcnt(0)
807 ; GFX78-NEXT: ; return to shader part epilog
809 ; GFX910-LABEL: s_buffer_load_imm_neg1:
811 ; GFX910-NEXT: s_mov_b32 s4, -1
812 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
813 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
814 ; GFX910-NEXT: ; return to shader part epilog
816 ; GFX11-LABEL: s_buffer_load_imm_neg1:
818 ; GFX11-NEXT: s_mov_b32 s4, -1
819 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
820 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
821 ; GFX11-NEXT: ; return to shader part epilog
823 ; GFX12-LABEL: s_buffer_load_imm_neg1:
825 ; GFX12-NEXT: s_mov_b32 s4, -1
826 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
827 ; GFX12-NEXT: s_wait_kmcnt 0x0
828 ; GFX12-NEXT: ; return to shader part epilog
829 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
833 define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
834 ; GFX6-LABEL: s_buffer_load_imm_neg4:
836 ; GFX6-NEXT: s_mov_b32 s4, -4
838 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
839 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
840 ; GFX6-NEXT: ; return to shader part epilog
842 ; GFX7-LABEL: s_buffer_load_imm_neg4:
844 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff
845 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
846 ; GFX7-NEXT: ; return to shader part epilog
848 ; GFX8-LABEL: s_buffer_load_imm_neg4:
850 ; GFX8-NEXT: s_mov_b32 s4, -4
851 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
852 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
853 ; GFX8-NEXT: ; return to shader part epilog
855 ; GFX910-LABEL: s_buffer_load_imm_neg4:
857 ; GFX910-NEXT: s_mov_b32 s4, -4
858 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
859 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
860 ; GFX910-NEXT: ; return to shader part epilog
862 ; GFX11-LABEL: s_buffer_load_imm_neg4:
864 ; GFX11-NEXT: s_mov_b32 s4, -4
865 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
866 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
867 ; GFX11-NEXT: ; return to shader part epilog
869 ; GFX12-LABEL: s_buffer_load_imm_neg4:
871 ; GFX12-NEXT: s_mov_b32 s4, -4
872 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
873 ; GFX12-NEXT: s_wait_kmcnt 0x0
874 ; GFX12-NEXT: ; return to shader part epilog
875 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
879 define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
880 ; GFX6-LABEL: s_buffer_load_imm_neg8:
882 ; GFX6-NEXT: s_mov_b32 s4, -8
884 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
885 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
886 ; GFX6-NEXT: ; return to shader part epilog
888 ; GFX7-LABEL: s_buffer_load_imm_neg8:
890 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe
891 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
892 ; GFX7-NEXT: ; return to shader part epilog
894 ; GFX8-LABEL: s_buffer_load_imm_neg8:
896 ; GFX8-NEXT: s_mov_b32 s4, -8
897 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
898 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
899 ; GFX8-NEXT: ; return to shader part epilog
901 ; GFX910-LABEL: s_buffer_load_imm_neg8:
903 ; GFX910-NEXT: s_mov_b32 s4, -8
904 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
905 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
906 ; GFX910-NEXT: ; return to shader part epilog
908 ; GFX11-LABEL: s_buffer_load_imm_neg8:
910 ; GFX11-NEXT: s_mov_b32 s4, -8
911 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
912 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
913 ; GFX11-NEXT: ; return to shader part epilog
915 ; GFX12-LABEL: s_buffer_load_imm_neg8:
917 ; GFX12-NEXT: s_mov_b32 s4, -8
918 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
919 ; GFX12-NEXT: s_wait_kmcnt 0x0
920 ; GFX12-NEXT: ; return to shader part epilog
921 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
925 define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
926 ; GFX6-LABEL: s_buffer_load_imm_bit31:
928 ; GFX6-NEXT: s_brev_b32 s4, 1
930 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
931 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
932 ; GFX6-NEXT: ; return to shader part epilog
934 ; GFX7-LABEL: s_buffer_load_imm_bit31:
936 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000
937 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
938 ; GFX7-NEXT: ; return to shader part epilog
940 ; GFX8-LABEL: s_buffer_load_imm_bit31:
942 ; GFX8-NEXT: s_brev_b32 s4, 1
943 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
944 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
945 ; GFX8-NEXT: ; return to shader part epilog
947 ; GFX910-LABEL: s_buffer_load_imm_bit31:
949 ; GFX910-NEXT: s_brev_b32 s4, 1
950 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
951 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
952 ; GFX910-NEXT: ; return to shader part epilog
954 ; GFX11-LABEL: s_buffer_load_imm_bit31:
956 ; GFX11-NEXT: s_brev_b32 s4, 1
957 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
958 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
959 ; GFX11-NEXT: ; return to shader part epilog
961 ; GFX12-LABEL: s_buffer_load_imm_bit31:
963 ; GFX12-NEXT: s_brev_b32 s4, 1
964 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
965 ; GFX12-NEXT: s_wait_kmcnt 0x0
966 ; GFX12-NEXT: ; return to shader part epilog
967 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
971 define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
972 ; GFX6-LABEL: s_buffer_load_imm_bit30:
974 ; GFX6-NEXT: s_mov_b32 s4, 2.0
976 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
977 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
978 ; GFX6-NEXT: ; return to shader part epilog
980 ; GFX7-LABEL: s_buffer_load_imm_bit30:
982 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000
983 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
984 ; GFX7-NEXT: ; return to shader part epilog
986 ; GFX8-LABEL: s_buffer_load_imm_bit30:
988 ; GFX8-NEXT: s_mov_b32 s4, 2.0
989 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
990 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
991 ; GFX8-NEXT: ; return to shader part epilog
993 ; GFX910-LABEL: s_buffer_load_imm_bit30:
995 ; GFX910-NEXT: s_mov_b32 s4, 2.0
996 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
997 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
998 ; GFX910-NEXT: ; return to shader part epilog
1000 ; GFX11-LABEL: s_buffer_load_imm_bit30:
1002 ; GFX11-NEXT: s_mov_b32 s4, 2.0
1003 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1004 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1005 ; GFX11-NEXT: ; return to shader part epilog
1007 ; GFX12-LABEL: s_buffer_load_imm_bit30:
1009 ; GFX12-NEXT: s_mov_b32 s4, 2.0
1010 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1011 ; GFX12-NEXT: s_wait_kmcnt 0x0
1012 ; GFX12-NEXT: ; return to shader part epilog
1013 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
1017 define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
1018 ; GFX6-LABEL: s_buffer_load_imm_bit29:
1020 ; GFX6-NEXT: s_brev_b32 s4, 4
1021 ; GFX6-NEXT: s_nop 3
1022 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1023 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1024 ; GFX6-NEXT: ; return to shader part epilog
1026 ; GFX7-LABEL: s_buffer_load_imm_bit29:
1028 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000
1029 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1030 ; GFX7-NEXT: ; return to shader part epilog
1032 ; GFX8-LABEL: s_buffer_load_imm_bit29:
1034 ; GFX8-NEXT: s_brev_b32 s4, 4
1035 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1036 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1037 ; GFX8-NEXT: ; return to shader part epilog
1039 ; GFX910-LABEL: s_buffer_load_imm_bit29:
1041 ; GFX910-NEXT: s_brev_b32 s4, 4
1042 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1043 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1044 ; GFX910-NEXT: ; return to shader part epilog
1046 ; GFX11-LABEL: s_buffer_load_imm_bit29:
1048 ; GFX11-NEXT: s_brev_b32 s4, 4
1049 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1050 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1051 ; GFX11-NEXT: ; return to shader part epilog
1053 ; GFX12-LABEL: s_buffer_load_imm_bit29:
1055 ; GFX12-NEXT: s_brev_b32 s4, 4
1056 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1057 ; GFX12-NEXT: s_wait_kmcnt 0x0
1058 ; GFX12-NEXT: ; return to shader part epilog
1059 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1063 define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
1064 ; GFX6-LABEL: s_buffer_load_imm_bit21:
1066 ; GFX6-NEXT: s_mov_b32 s4, 0x200000
1067 ; GFX6-NEXT: s_nop 3
1068 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1069 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1070 ; GFX6-NEXT: ; return to shader part epilog
1072 ; GFX7-LABEL: s_buffer_load_imm_bit21:
1074 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
1075 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1076 ; GFX7-NEXT: ; return to shader part epilog
1078 ; GFX8-LABEL: s_buffer_load_imm_bit21:
1080 ; GFX8-NEXT: s_mov_b32 s4, 0x200000
1081 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1082 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1083 ; GFX8-NEXT: ; return to shader part epilog
1085 ; GFX910-LABEL: s_buffer_load_imm_bit21:
1087 ; GFX910-NEXT: s_mov_b32 s4, 0x200000
1088 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1089 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1090 ; GFX910-NEXT: ; return to shader part epilog
1092 ; GFX11-LABEL: s_buffer_load_imm_bit21:
1094 ; GFX11-NEXT: s_mov_b32 s4, 0x200000
1095 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1096 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1097 ; GFX11-NEXT: ; return to shader part epilog
1099 ; GFX12-LABEL: s_buffer_load_imm_bit21:
1101 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x200000
1102 ; GFX12-NEXT: s_wait_kmcnt 0x0
1103 ; GFX12-NEXT: ; return to shader part epilog
1104 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1108 define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
1109 ; GFX6-LABEL: s_buffer_load_imm_bit20:
1111 ; GFX6-NEXT: s_mov_b32 s4, 0x100000
1112 ; GFX6-NEXT: s_nop 3
1113 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1114 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1115 ; GFX6-NEXT: ; return to shader part epilog
1117 ; GFX7-LABEL: s_buffer_load_imm_bit20:
1119 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000
1120 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1121 ; GFX7-NEXT: ; return to shader part epilog
1123 ; GFX8-LABEL: s_buffer_load_imm_bit20:
1125 ; GFX8-NEXT: s_mov_b32 s4, 0x100000
1126 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1127 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1128 ; GFX8-NEXT: ; return to shader part epilog
1130 ; GFX910-LABEL: s_buffer_load_imm_bit20:
1132 ; GFX910-NEXT: s_mov_b32 s4, 0x100000
1133 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1134 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1135 ; GFX910-NEXT: ; return to shader part epilog
1137 ; GFX11-LABEL: s_buffer_load_imm_bit20:
1139 ; GFX11-NEXT: s_mov_b32 s4, 0x100000
1140 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1141 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1142 ; GFX11-NEXT: ; return to shader part epilog
1144 ; GFX12-LABEL: s_buffer_load_imm_bit20:
1146 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100000
1147 ; GFX12-NEXT: s_wait_kmcnt 0x0
1148 ; GFX12-NEXT: ; return to shader part epilog
1149 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1153 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
1154 ; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
1156 ; GFX6-NEXT: s_mov_b32 s4, 0xfff00000
1157 ; GFX6-NEXT: s_nop 3
1158 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1159 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1160 ; GFX6-NEXT: ; return to shader part epilog
1162 ; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
1164 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000
1165 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1166 ; GFX7-NEXT: ; return to shader part epilog
1168 ; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
1170 ; GFX8-NEXT: s_mov_b32 s4, 0xfff00000
1171 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1172 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1173 ; GFX8-NEXT: ; return to shader part epilog
1175 ; GFX910-LABEL: s_buffer_load_imm_neg_bit20:
1177 ; GFX910-NEXT: s_mov_b32 s4, 0xfff00000
1178 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1179 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1180 ; GFX910-NEXT: ; return to shader part epilog
1182 ; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
1184 ; GFX11-NEXT: s_mov_b32 s4, 0xfff00000
1185 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1186 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1187 ; GFX11-NEXT: ; return to shader part epilog
1189 ; GFX12-LABEL: s_buffer_load_imm_neg_bit20:
1191 ; GFX12-NEXT: s_mov_b32 s4, 0xfff00000
1192 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1193 ; GFX12-NEXT: s_wait_kmcnt 0x0
1194 ; GFX12-NEXT: ; return to shader part epilog
1195 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
1199 define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
1200 ; GFX6-LABEL: s_buffer_load_imm_bit19:
1202 ; GFX6-NEXT: s_mov_b32 s4, 0x80000
1203 ; GFX6-NEXT: s_nop 3
1204 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1205 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1206 ; GFX6-NEXT: ; return to shader part epilog
1208 ; GFX7-LABEL: s_buffer_load_imm_bit19:
1210 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000
1211 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1212 ; GFX7-NEXT: ; return to shader part epilog
1214 ; GFX8910-LABEL: s_buffer_load_imm_bit19:
1216 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
1217 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1218 ; GFX8910-NEXT: ; return to shader part epilog
1220 ; GFX11-LABEL: s_buffer_load_imm_bit19:
1222 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
1223 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1224 ; GFX11-NEXT: ; return to shader part epilog
1226 ; GFX12-LABEL: s_buffer_load_imm_bit19:
1228 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
1229 ; GFX12-NEXT: s_wait_kmcnt 0x0
1230 ; GFX12-NEXT: ; return to shader part epilog
1231 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1235 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
1236 ; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
1238 ; GFX6-NEXT: s_mov_b32 s4, 0xfff80000
1239 ; GFX6-NEXT: s_nop 3
1240 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1241 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1242 ; GFX6-NEXT: ; return to shader part epilog
1244 ; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
1246 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000
1247 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1248 ; GFX7-NEXT: ; return to shader part epilog
1250 ; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
1252 ; GFX8-NEXT: s_mov_b32 s4, 0xfff80000
1253 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1254 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1255 ; GFX8-NEXT: ; return to shader part epilog
1257 ; GFX910-LABEL: s_buffer_load_imm_neg_bit19:
1259 ; GFX910-NEXT: s_mov_b32 s4, 0xfff80000
1260 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1261 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1262 ; GFX910-NEXT: ; return to shader part epilog
1264 ; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
1266 ; GFX11-NEXT: s_mov_b32 s4, 0xfff80000
1267 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1268 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1269 ; GFX11-NEXT: ; return to shader part epilog
1271 ; GFX12-LABEL: s_buffer_load_imm_neg_bit19:
1273 ; GFX12-NEXT: s_mov_b32 s4, 0xfff80000
1274 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1275 ; GFX12-NEXT: s_wait_kmcnt 0x0
1276 ; GFX12-NEXT: ; return to shader part epilog
1277 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1281 define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
1282 ; GFX6-LABEL: s_buffer_load_imm_255:
1284 ; GFX6-NEXT: s_movk_i32 s4, 0xff
1285 ; GFX6-NEXT: s_nop 3
1286 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1287 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1288 ; GFX6-NEXT: ; return to shader part epilog
1290 ; GFX7-LABEL: s_buffer_load_imm_255:
1292 ; GFX7-NEXT: s_movk_i32 s4, 0xff
1293 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1294 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1295 ; GFX7-NEXT: ; return to shader part epilog
1297 ; GFX8910-LABEL: s_buffer_load_imm_255:
1299 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1300 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1301 ; GFX8910-NEXT: ; return to shader part epilog
1303 ; GFX11-LABEL: s_buffer_load_imm_255:
1305 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
1306 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1307 ; GFX11-NEXT: ; return to shader part epilog
1309 ; GFX12-LABEL: s_buffer_load_imm_255:
1311 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
1312 ; GFX12-NEXT: s_wait_kmcnt 0x0
1313 ; GFX12-NEXT: ; return to shader part epilog
1314 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
1318 define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
1319 ; GFX67-LABEL: s_buffer_load_imm_256:
1321 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40
1322 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1323 ; GFX67-NEXT: ; return to shader part epilog
1325 ; GFX8910-LABEL: s_buffer_load_imm_256:
1327 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1328 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1329 ; GFX8910-NEXT: ; return to shader part epilog
1331 ; GFX11-LABEL: s_buffer_load_imm_256:
1333 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
1334 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1335 ; GFX11-NEXT: ; return to shader part epilog
1337 ; GFX12-LABEL: s_buffer_load_imm_256:
1339 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
1340 ; GFX12-NEXT: s_wait_kmcnt 0x0
1341 ; GFX12-NEXT: ; return to shader part epilog
1342 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
1346 define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
1347 ; GFX67-LABEL: s_buffer_load_imm_1016:
1349 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe
1350 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1351 ; GFX67-NEXT: ; return to shader part epilog
1353 ; GFX8910-LABEL: s_buffer_load_imm_1016:
1355 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8
1356 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1357 ; GFX8910-NEXT: ; return to shader part epilog
1359 ; GFX11-LABEL: s_buffer_load_imm_1016:
1361 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
1362 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1363 ; GFX11-NEXT: ; return to shader part epilog
1365 ; GFX12-LABEL: s_buffer_load_imm_1016:
1367 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
1368 ; GFX12-NEXT: s_wait_kmcnt 0x0
1369 ; GFX12-NEXT: ; return to shader part epilog
1370 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
1374 define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
1375 ; GFX67-LABEL: s_buffer_load_imm_1020:
1377 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1378 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1379 ; GFX67-NEXT: ; return to shader part epilog
1381 ; GFX8910-LABEL: s_buffer_load_imm_1020:
1383 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc
1384 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1385 ; GFX8910-NEXT: ; return to shader part epilog
1387 ; GFX11-LABEL: s_buffer_load_imm_1020:
1389 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
1390 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1391 ; GFX11-NEXT: ; return to shader part epilog
1393 ; GFX12-LABEL: s_buffer_load_imm_1020:
1395 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
1396 ; GFX12-NEXT: s_wait_kmcnt 0x0
1397 ; GFX12-NEXT: ; return to shader part epilog
1398 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
1402 define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
1403 ; GFX6-LABEL: s_buffer_load_imm_1021:
1405 ; GFX6-NEXT: s_movk_i32 s4, 0x3fd
1406 ; GFX6-NEXT: s_nop 3
1407 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1408 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1409 ; GFX6-NEXT: ; return to shader part epilog
1411 ; GFX7-LABEL: s_buffer_load_imm_1021:
1413 ; GFX7-NEXT: s_movk_i32 s4, 0x3fd
1414 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1415 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1416 ; GFX7-NEXT: ; return to shader part epilog
1418 ; GFX8910-LABEL: s_buffer_load_imm_1021:
1420 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd
1421 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1422 ; GFX8910-NEXT: ; return to shader part epilog
1424 ; GFX11-LABEL: s_buffer_load_imm_1021:
1426 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
1427 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1428 ; GFX11-NEXT: ; return to shader part epilog
1430 ; GFX12-LABEL: s_buffer_load_imm_1021:
1432 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
1433 ; GFX12-NEXT: s_wait_kmcnt 0x0
1434 ; GFX12-NEXT: ; return to shader part epilog
1435 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
1439 define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
1440 ; GFX6-LABEL: s_buffer_load_imm_1024:
1442 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1443 ; GFX6-NEXT: s_nop 3
1444 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1445 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1446 ; GFX6-NEXT: ; return to shader part epilog
1448 ; GFX7-LABEL: s_buffer_load_imm_1024:
1450 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1451 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1452 ; GFX7-NEXT: ; return to shader part epilog
1454 ; GFX8910-LABEL: s_buffer_load_imm_1024:
1456 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1457 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1458 ; GFX8910-NEXT: ; return to shader part epilog
1460 ; GFX11-LABEL: s_buffer_load_imm_1024:
1462 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1463 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1464 ; GFX11-NEXT: ; return to shader part epilog
1466 ; GFX12-LABEL: s_buffer_load_imm_1024:
1468 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1469 ; GFX12-NEXT: s_wait_kmcnt 0x0
1470 ; GFX12-NEXT: ; return to shader part epilog
1471 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1475 define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
1476 ; GFX6-LABEL: s_buffer_load_imm_1025:
1478 ; GFX6-NEXT: s_movk_i32 s4, 0x401
1479 ; GFX6-NEXT: s_nop 3
1480 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1481 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1482 ; GFX6-NEXT: ; return to shader part epilog
1484 ; GFX7-LABEL: s_buffer_load_imm_1025:
1486 ; GFX7-NEXT: s_movk_i32 s4, 0x401
1487 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1488 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1489 ; GFX7-NEXT: ; return to shader part epilog
1491 ; GFX8910-LABEL: s_buffer_load_imm_1025:
1493 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x401
1494 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1495 ; GFX8910-NEXT: ; return to shader part epilog
1497 ; GFX11-LABEL: s_buffer_load_imm_1025:
1499 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
1500 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1501 ; GFX11-NEXT: ; return to shader part epilog
1503 ; GFX12-LABEL: s_buffer_load_imm_1025:
1505 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
1506 ; GFX12-NEXT: s_wait_kmcnt 0x0
1507 ; GFX12-NEXT: ; return to shader part epilog
1508 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
1512 define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
1513 ; GFX6-LABEL: s_buffer_load_imm_1028:
1515 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1516 ; GFX6-NEXT: s_nop 3
1517 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1518 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1519 ; GFX6-NEXT: ; return to shader part epilog
1521 ; GFX7-LABEL: s_buffer_load_imm_1028:
1523 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1524 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1525 ; GFX7-NEXT: ; return to shader part epilog
1527 ; GFX8910-LABEL: s_buffer_load_imm_1028:
1529 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1530 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1531 ; GFX8910-NEXT: ; return to shader part epilog
1533 ; GFX11-LABEL: s_buffer_load_imm_1028:
1535 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1536 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1537 ; GFX11-NEXT: ; return to shader part epilog
1539 ; GFX12-LABEL: s_buffer_load_imm_1028:
1541 ; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1542 ; GFX12-NEXT: s_wait_kmcnt 0x0
1543 ; GFX12-NEXT: ; return to shader part epilog
1544 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1548 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
1549 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
1550 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
1551 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
1552 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
1554 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: