1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
3 ; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
5 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
6 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
7 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
9 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
10 ; GFX67-LABEL: s_buffer_load_imm:
11 ; GFX67: ; %bb.0: ; %main_body
12 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1
13 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
14 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
15 ; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm
16 ; GFX67-NEXT: s_endpgm
18 ; GFX8910-LABEL: s_buffer_load_imm:
19 ; GFX8910: ; %bb.0: ; %main_body
20 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x4
21 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
22 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
23 ; GFX8910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
24 ; GFX8910-NEXT: s_endpgm
26 ; GFX11-LABEL: s_buffer_load_imm:
27 ; GFX11: ; %bb.0: ; %main_body
28 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
29 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
30 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
31 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
32 ; GFX11-NEXT: s_endpgm
34 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
35 %bitcast = bitcast i32 %load to float
36 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
40 define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
41 ; GFX678-LABEL: s_buffer_load_index:
42 ; GFX678: ; %bb.0: ; %main_body
43 ; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4
44 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
45 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
46 ; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
47 ; GFX678-NEXT: s_endpgm
49 ; GFX910-LABEL: s_buffer_load_index:
50 ; GFX910: ; %bb.0: ; %main_body
51 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
52 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
53 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
54 ; GFX910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
55 ; GFX910-NEXT: s_endpgm
57 ; GFX11-LABEL: s_buffer_load_index:
58 ; GFX11: ; %bb.0: ; %main_body
59 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
60 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
62 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
63 ; GFX11-NEXT: s_endpgm
65 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
66 %bitcast = bitcast i32 %load to float
67 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
71 define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
72 ; GFX678910-LABEL: s_buffer_load_index_divergent:
73 ; GFX678910: ; %bb.0: ; %main_body
74 ; GFX678910-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
75 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
76 ; GFX678910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
77 ; GFX678910-NEXT: s_endpgm
79 ; GFX11-LABEL: s_buffer_load_index_divergent:
80 ; GFX11: ; %bb.0: ; %main_body
81 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
82 ; GFX11-NEXT: s_waitcnt vmcnt(0)
83 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
84 ; GFX11-NEXT: s_endpgm
86 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
87 %bitcast = bitcast i32 %load to float
88 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
92 define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
93 ; GFX67-LABEL: s_buffer_loadx2_imm:
94 ; GFX67: ; %bb.0: ; %main_body
95 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
96 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
97 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
98 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
99 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
100 ; GFX67-NEXT: s_endpgm
102 ; GFX8910-LABEL: s_buffer_loadx2_imm:
103 ; GFX8910: ; %bb.0: ; %main_body
104 ; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
105 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
106 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
107 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
108 ; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
109 ; GFX8910-NEXT: s_endpgm
111 ; GFX11-LABEL: s_buffer_loadx2_imm:
112 ; GFX11: ; %bb.0: ; %main_body
113 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
114 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
116 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
117 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
118 ; GFX11-NEXT: s_endpgm
120 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
121 %bitcast = bitcast <2 x i32> %load to <2 x float>
122 %x = extractelement <2 x float> %bitcast, i32 0
123 %y = extractelement <2 x float> %bitcast, i32 1
124 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
128 define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
129 ; GFX678-LABEL: s_buffer_loadx2_index:
130 ; GFX678: ; %bb.0: ; %main_body
131 ; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4
132 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
133 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
134 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
135 ; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
136 ; GFX678-NEXT: s_endpgm
138 ; GFX910-LABEL: s_buffer_loadx2_index:
139 ; GFX910: ; %bb.0: ; %main_body
140 ; GFX910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0
141 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
142 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
143 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
144 ; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
145 ; GFX910-NEXT: s_endpgm
147 ; GFX11-LABEL: s_buffer_loadx2_index:
148 ; GFX11: ; %bb.0: ; %main_body
149 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
150 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
151 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
152 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
153 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
154 ; GFX11-NEXT: s_endpgm
156 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
157 %bitcast = bitcast <2 x i32> %load to <2 x float>
158 %x = extractelement <2 x float> %bitcast, i32 0
159 %y = extractelement <2 x float> %bitcast, i32 1
160 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
164 define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
165 ; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
166 ; GFX678910: ; %bb.0: ; %main_body
167 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
168 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
169 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
170 ; GFX678910-NEXT: s_endpgm
172 ; GFX11-LABEL: s_buffer_loadx2_index_divergent:
173 ; GFX11: ; %bb.0: ; %main_body
174 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
175 ; GFX11-NEXT: s_waitcnt vmcnt(0)
176 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
177 ; GFX11-NEXT: s_endpgm
179 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
180 %bitcast = bitcast <2 x i32> %load to <2 x float>
181 %x = extractelement <2 x float> %bitcast, i32 0
182 %y = extractelement <2 x float> %bitcast, i32 1
183 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
187 define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
188 ; GFX67-LABEL: s_buffer_loadx3_imm:
189 ; GFX67: ; %bb.0: ; %main_body
190 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
191 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
192 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
193 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
194 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
195 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm
196 ; GFX67-NEXT: s_endpgm
198 ; GFX8910-LABEL: s_buffer_loadx3_imm:
199 ; GFX8910: ; %bb.0: ; %main_body
200 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
201 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
202 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
203 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
204 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
205 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
206 ; GFX8910-NEXT: s_endpgm
208 ; GFX11-LABEL: s_buffer_loadx3_imm:
209 ; GFX11: ; %bb.0: ; %main_body
210 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x40
211 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
212 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
213 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
214 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
215 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
216 ; GFX11-NEXT: s_endpgm
218 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
219 %bitcast = bitcast <3 x i32> %load to <3 x float>
220 %x = extractelement <3 x float> %bitcast, i32 0
221 %y = extractelement <3 x float> %bitcast, i32 1
222 %z = extractelement <3 x float> %bitcast, i32 2
223 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
227 define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
228 ; GFX678-LABEL: s_buffer_loadx3_index:
229 ; GFX678: ; %bb.0: ; %main_body
230 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
231 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
233 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
234 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
235 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm
236 ; GFX678-NEXT: s_endpgm
238 ; GFX910-LABEL: s_buffer_loadx3_index:
239 ; GFX910: ; %bb.0: ; %main_body
240 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
241 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
242 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
243 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
244 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
245 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
246 ; GFX910-NEXT: s_endpgm
248 ; GFX11-LABEL: s_buffer_loadx3_index:
249 ; GFX11: ; %bb.0: ; %main_body
250 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
251 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
252 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
253 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
254 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
255 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
256 ; GFX11-NEXT: s_endpgm
258 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
259 %bitcast = bitcast <3 x i32> %load to <3 x float>
260 %x = extractelement <3 x float> %bitcast, i32 0
261 %y = extractelement <3 x float> %bitcast, i32 1
262 %z = extractelement <3 x float> %bitcast, i32 2
263 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
267 define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
268 ; GFX6-LABEL: s_buffer_loadx3_index_divergent:
269 ; GFX6: ; %bb.0: ; %main_body
270 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
271 ; GFX6-NEXT: s_waitcnt vmcnt(0)
272 ; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm
273 ; GFX6-NEXT: s_endpgm
275 ; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
276 ; GFX78910: ; %bb.0: ; %main_body
277 ; GFX78910-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
278 ; GFX78910-NEXT: s_waitcnt vmcnt(0)
279 ; GFX78910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
280 ; GFX78910-NEXT: s_endpgm
282 ; GFX11-LABEL: s_buffer_loadx3_index_divergent:
283 ; GFX11: ; %bb.0: ; %main_body
284 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
285 ; GFX11-NEXT: s_waitcnt vmcnt(0)
286 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
287 ; GFX11-NEXT: s_endpgm
289 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
290 %bitcast = bitcast <3 x i32> %load to <3 x float>
291 %x = extractelement <3 x float> %bitcast, i32 0
292 %y = extractelement <3 x float> %bitcast, i32 1
293 %z = extractelement <3 x float> %bitcast, i32 2
294 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
298 define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
299 ; GFX67-LABEL: s_buffer_loadx4_imm:
300 ; GFX67: ; %bb.0: ; %main_body
301 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
302 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
303 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
304 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
305 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
306 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
307 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
308 ; GFX67-NEXT: s_endpgm
310 ; GFX8910-LABEL: s_buffer_loadx4_imm:
311 ; GFX8910: ; %bb.0: ; %main_body
312 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
313 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
314 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
315 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
316 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
317 ; GFX8910-NEXT: v_mov_b32_e32 v3, s3
318 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
319 ; GFX8910-NEXT: s_endpgm
321 ; GFX11-LABEL: s_buffer_loadx4_imm:
322 ; GFX11: ; %bb.0: ; %main_body
323 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
324 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
325 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
326 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
327 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
328 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
329 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
330 ; GFX11-NEXT: s_endpgm
332 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
333 %bitcast = bitcast <4 x i32> %load to <4 x float>
334 %x = extractelement <4 x float> %bitcast, i32 0
335 %y = extractelement <4 x float> %bitcast, i32 1
336 %z = extractelement <4 x float> %bitcast, i32 2
337 %w = extractelement <4 x float> %bitcast, i32 3
338 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
342 define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
343 ; GFX678-LABEL: s_buffer_loadx4_index:
344 ; GFX678: ; %bb.0: ; %main_body
345 ; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
346 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
347 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
348 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
349 ; GFX678-NEXT: v_mov_b32_e32 v2, s2
350 ; GFX678-NEXT: v_mov_b32_e32 v3, s3
351 ; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
352 ; GFX678-NEXT: s_endpgm
354 ; GFX910-LABEL: s_buffer_loadx4_index:
355 ; GFX910: ; %bb.0: ; %main_body
356 ; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
357 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
358 ; GFX910-NEXT: v_mov_b32_e32 v0, s0
359 ; GFX910-NEXT: v_mov_b32_e32 v1, s1
360 ; GFX910-NEXT: v_mov_b32_e32 v2, s2
361 ; GFX910-NEXT: v_mov_b32_e32 v3, s3
362 ; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
363 ; GFX910-NEXT: s_endpgm
365 ; GFX11-LABEL: s_buffer_loadx4_index:
366 ; GFX11: ; %bb.0: ; %main_body
367 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
368 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
369 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
370 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
371 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
372 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
373 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
374 ; GFX11-NEXT: s_endpgm
376 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
377 %bitcast = bitcast <4 x i32> %load to <4 x float>
378 %x = extractelement <4 x float> %bitcast, i32 0
379 %y = extractelement <4 x float> %bitcast, i32 1
380 %z = extractelement <4 x float> %bitcast, i32 2
381 %w = extractelement <4 x float> %bitcast, i32 3
382 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
386 define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
387 ; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
388 ; GFX678910: ; %bb.0: ; %main_body
389 ; GFX678910-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
390 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
391 ; GFX678910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
392 ; GFX678910-NEXT: s_endpgm
394 ; GFX11-LABEL: s_buffer_loadx4_index_divergent:
395 ; GFX11: ; %bb.0: ; %main_body
396 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
398 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
399 ; GFX11-NEXT: s_endpgm
401 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
402 %bitcast = bitcast <4 x i32> %load to <4 x float>
403 %x = extractelement <4 x float> %bitcast, i32 0
404 %y = extractelement <4 x float> %bitcast, i32 1
405 %z = extractelement <4 x float> %bitcast, i32 2
406 %w = extractelement <4 x float> %bitcast, i32 3
407 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
411 define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
412 ; GFX67-LABEL: s_buffer_load_imm_mergex2:
413 ; GFX67: ; %bb.0: ; %main_body
414 ; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
415 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
416 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
417 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
418 ; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
419 ; GFX67-NEXT: s_endpgm
421 ; GFX8910-LABEL: s_buffer_load_imm_mergex2:
422 ; GFX8910: ; %bb.0: ; %main_body
423 ; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
424 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
425 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
426 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
427 ; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
428 ; GFX8910-NEXT: s_endpgm
430 ; GFX11-LABEL: s_buffer_load_imm_mergex2:
431 ; GFX11: ; %bb.0: ; %main_body
432 ; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
433 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
434 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
435 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
436 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
437 ; GFX11-NEXT: s_endpgm
439 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
440 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
441 %x = bitcast i32 %load0 to float
442 %y = bitcast i32 %load1 to float
443 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
447 define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
448 ; GFX67-LABEL: s_buffer_load_imm_mergex4:
449 ; GFX67: ; %bb.0: ; %main_body
450 ; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
451 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
452 ; GFX67-NEXT: v_mov_b32_e32 v0, s0
453 ; GFX67-NEXT: v_mov_b32_e32 v1, s1
454 ; GFX67-NEXT: v_mov_b32_e32 v2, s2
455 ; GFX67-NEXT: v_mov_b32_e32 v3, s3
456 ; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
457 ; GFX67-NEXT: s_endpgm
459 ; GFX8910-LABEL: s_buffer_load_imm_mergex4:
460 ; GFX8910: ; %bb.0: ; %main_body
461 ; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
462 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
463 ; GFX8910-NEXT: v_mov_b32_e32 v0, s0
464 ; GFX8910-NEXT: v_mov_b32_e32 v1, s1
465 ; GFX8910-NEXT: v_mov_b32_e32 v2, s2
466 ; GFX8910-NEXT: v_mov_b32_e32 v3, s3
467 ; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
468 ; GFX8910-NEXT: s_endpgm
470 ; GFX11-LABEL: s_buffer_load_imm_mergex4:
471 ; GFX11: ; %bb.0: ; %main_body
472 ; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
473 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
474 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
475 ; GFX11-NEXT: v_mov_b32_e32 v1, s1
476 ; GFX11-NEXT: v_mov_b32_e32 v2, s2
477 ; GFX11-NEXT: v_mov_b32_e32 v3, s3
478 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
479 ; GFX11-NEXT: s_endpgm
481 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
482 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
483 %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
484 %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
485 %x = bitcast i32 %load0 to float
486 %y = bitcast i32 %load1 to float
487 %z = bitcast i32 %load2 to float
488 %w = bitcast i32 %load3 to float
489 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
493 @gv = external addrspace(1) global i32
495 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
496 ; GFX6-LABEL: s_buffer_load_index_across_bb:
497 ; GFX6: ; %bb.0: ; %main_body
498 ; GFX6-NEXT: s_getpc_b64 s[4:5]
499 ; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
500 ; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
501 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
502 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
503 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
504 ; GFX6-NEXT: s_mov_b32 s6, -1
505 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
506 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
507 ; GFX6-NEXT: s_waitcnt expcnt(0)
508 ; GFX6-NEXT: v_or_b32_e32 v0, 8, v0
509 ; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
510 ; GFX6-NEXT: s_waitcnt vmcnt(0)
511 ; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm
512 ; GFX6-NEXT: s_endpgm
514 ; GFX7-LABEL: s_buffer_load_index_across_bb:
515 ; GFX7: ; %bb.0: ; %main_body
516 ; GFX7-NEXT: s_getpc_b64 s[4:5]
517 ; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
518 ; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
519 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
520 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
521 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
522 ; GFX7-NEXT: s_mov_b32 s6, -1
523 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
524 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
525 ; GFX7-NEXT: v_or_b32_e32 v0, 8, v0
526 ; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
527 ; GFX7-NEXT: s_waitcnt vmcnt(0)
528 ; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm
529 ; GFX7-NEXT: s_endpgm
531 ; GFX8-LABEL: s_buffer_load_index_across_bb:
532 ; GFX8: ; %bb.0: ; %main_body
533 ; GFX8-NEXT: s_getpc_b64 s[4:5]
534 ; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
535 ; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
536 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
537 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
538 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
539 ; GFX8-NEXT: v_mov_b32_e32 v1, s4
540 ; GFX8-NEXT: v_mov_b32_e32 v2, s5
541 ; GFX8-NEXT: flat_store_dword v[1:2], v0
542 ; GFX8-NEXT: v_or_b32_e32 v0, 8, v0
543 ; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
544 ; GFX8-NEXT: s_waitcnt vmcnt(0)
545 ; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
546 ; GFX8-NEXT: s_endpgm
548 ; GFX9-LABEL: s_buffer_load_index_across_bb:
549 ; GFX9: ; %bb.0: ; %main_body
550 ; GFX9-NEXT: s_getpc_b64 s[4:5]
551 ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
552 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
553 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
554 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
555 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
556 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
557 ; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
558 ; GFX9-NEXT: v_or_b32_e32 v0, 8, v0
559 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
560 ; GFX9-NEXT: s_waitcnt vmcnt(0)
561 ; GFX9-NEXT: exp mrt0 v0, v0, v0, v0 done vm
562 ; GFX9-NEXT: s_endpgm
564 ; GFX10-LABEL: s_buffer_load_index_across_bb:
565 ; GFX10: ; %bb.0: ; %main_body
566 ; GFX10-NEXT: s_getpc_b64 s[4:5]
567 ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
568 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
569 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
570 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
571 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
572 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
573 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
574 ; GFX10-NEXT: v_or_b32_e32 v0, 8, v0
575 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
576 ; GFX10-NEXT: s_waitcnt vmcnt(0)
577 ; GFX10-NEXT: exp mrt0 v0, v0, v0, v0 done vm
578 ; GFX10-NEXT: s_endpgm
580 ; GFX11-LABEL: s_buffer_load_index_across_bb:
581 ; GFX11: ; %bb.0: ; %main_body
582 ; GFX11-NEXT: s_getpc_b64 s[4:5]
583 ; GFX11-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
584 ; GFX11-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
585 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
586 ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
587 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
588 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
589 ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
590 ; GFX11-NEXT: v_or_b32_e32 v0, 8, v0
591 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
592 ; GFX11-NEXT: s_waitcnt vmcnt(0)
593 ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
594 ; GFX11-NEXT: s_nop 0
595 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
596 ; GFX11-NEXT: s_endpgm
598 %tmp = shl i32 %index, 4
599 store i32 %tmp, ptr addrspace(1) @gv
602 bb1: ; preds = %main_body
603 %tmp1 = or i32 %tmp, 8
604 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
605 %bitcast = bitcast i32 %load to float
606 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
610 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
611 ; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
612 ; GFX678910: ; %bb.0: ; %main_body
613 ; GFX678910-NEXT: v_lshlrev_b32_e32 v0, 4, v0
614 ; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
615 ; GFX678910-NEXT: s_waitcnt vmcnt(0)
616 ; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
617 ; GFX678910-NEXT: s_endpgm
619 ; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
620 ; GFX11: ; %bb.0: ; %main_body
621 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
622 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
623 ; GFX11-NEXT: s_waitcnt vmcnt(0)
624 ; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
625 ; GFX11-NEXT: s_endpgm
627 %tmp = shl i32 %index, 4
630 bb1: ; preds = %main_body
631 %tmp1 = or i32 %tmp, 8
632 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
633 %tmp2 = or i32 %tmp1, 4
634 %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
635 %bitcast = bitcast i32 %load to float
636 %bitcast2 = bitcast i32 %load2 to float
637 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
641 define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
642 ; GFX6-LABEL: s_buffer_load_imm_neg1:
644 ; GFX6-NEXT: s_mov_b32 s4, -1
646 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
647 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
648 ; GFX6-NEXT: ; return to shader part epilog
650 ; GFX78-LABEL: s_buffer_load_imm_neg1:
652 ; GFX78-NEXT: s_mov_b32 s4, -1
653 ; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4
654 ; GFX78-NEXT: s_waitcnt lgkmcnt(0)
655 ; GFX78-NEXT: ; return to shader part epilog
657 ; GFX910-LABEL: s_buffer_load_imm_neg1:
659 ; GFX910-NEXT: s_mov_b32 s4, -1
660 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
661 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
662 ; GFX910-NEXT: ; return to shader part epilog
664 ; GFX11-LABEL: s_buffer_load_imm_neg1:
666 ; GFX11-NEXT: s_mov_b32 s4, -1
667 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
668 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
669 ; GFX11-NEXT: ; return to shader part epilog
670 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
674 define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
675 ; GFX6-LABEL: s_buffer_load_imm_neg4:
677 ; GFX6-NEXT: s_mov_b32 s4, -4
679 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
680 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
681 ; GFX6-NEXT: ; return to shader part epilog
683 ; GFX7-LABEL: s_buffer_load_imm_neg4:
685 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff
686 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
687 ; GFX7-NEXT: ; return to shader part epilog
689 ; GFX8-LABEL: s_buffer_load_imm_neg4:
691 ; GFX8-NEXT: s_mov_b32 s4, -4
692 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
693 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
694 ; GFX8-NEXT: ; return to shader part epilog
696 ; GFX910-LABEL: s_buffer_load_imm_neg4:
698 ; GFX910-NEXT: s_mov_b32 s4, -4
699 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
700 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
701 ; GFX910-NEXT: ; return to shader part epilog
703 ; GFX11-LABEL: s_buffer_load_imm_neg4:
705 ; GFX11-NEXT: s_mov_b32 s4, -4
706 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
707 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
708 ; GFX11-NEXT: ; return to shader part epilog
709 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
713 define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
714 ; GFX6-LABEL: s_buffer_load_imm_neg8:
716 ; GFX6-NEXT: s_mov_b32 s4, -8
718 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
719 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
720 ; GFX6-NEXT: ; return to shader part epilog
722 ; GFX7-LABEL: s_buffer_load_imm_neg8:
724 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe
725 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
726 ; GFX7-NEXT: ; return to shader part epilog
728 ; GFX8-LABEL: s_buffer_load_imm_neg8:
730 ; GFX8-NEXT: s_mov_b32 s4, -8
731 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
732 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
733 ; GFX8-NEXT: ; return to shader part epilog
735 ; GFX910-LABEL: s_buffer_load_imm_neg8:
737 ; GFX910-NEXT: s_mov_b32 s4, -8
738 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
739 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
740 ; GFX910-NEXT: ; return to shader part epilog
742 ; GFX11-LABEL: s_buffer_load_imm_neg8:
744 ; GFX11-NEXT: s_mov_b32 s4, -8
745 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
746 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
747 ; GFX11-NEXT: ; return to shader part epilog
748 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
752 define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
753 ; GFX6-LABEL: s_buffer_load_imm_bit31:
755 ; GFX6-NEXT: s_brev_b32 s4, 1
757 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
758 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
759 ; GFX6-NEXT: ; return to shader part epilog
761 ; GFX7-LABEL: s_buffer_load_imm_bit31:
763 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000
764 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
765 ; GFX7-NEXT: ; return to shader part epilog
767 ; GFX8-LABEL: s_buffer_load_imm_bit31:
769 ; GFX8-NEXT: s_brev_b32 s4, 1
770 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
771 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
772 ; GFX8-NEXT: ; return to shader part epilog
774 ; GFX910-LABEL: s_buffer_load_imm_bit31:
776 ; GFX910-NEXT: s_brev_b32 s4, 1
777 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
778 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
779 ; GFX910-NEXT: ; return to shader part epilog
781 ; GFX11-LABEL: s_buffer_load_imm_bit31:
783 ; GFX11-NEXT: s_brev_b32 s4, 1
784 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
785 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
786 ; GFX11-NEXT: ; return to shader part epilog
787 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
791 define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
792 ; GFX6-LABEL: s_buffer_load_imm_bit30:
794 ; GFX6-NEXT: s_mov_b32 s4, 2.0
796 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
797 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
798 ; GFX6-NEXT: ; return to shader part epilog
800 ; GFX7-LABEL: s_buffer_load_imm_bit30:
802 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000
803 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
804 ; GFX7-NEXT: ; return to shader part epilog
806 ; GFX8-LABEL: s_buffer_load_imm_bit30:
808 ; GFX8-NEXT: s_mov_b32 s4, 2.0
809 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
810 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
811 ; GFX8-NEXT: ; return to shader part epilog
813 ; GFX910-LABEL: s_buffer_load_imm_bit30:
815 ; GFX910-NEXT: s_mov_b32 s4, 2.0
816 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
817 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
818 ; GFX910-NEXT: ; return to shader part epilog
820 ; GFX11-LABEL: s_buffer_load_imm_bit30:
822 ; GFX11-NEXT: s_mov_b32 s4, 2.0
823 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
824 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
825 ; GFX11-NEXT: ; return to shader part epilog
826 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
830 define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
831 ; GFX6-LABEL: s_buffer_load_imm_bit29:
833 ; GFX6-NEXT: s_brev_b32 s4, 4
835 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
836 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
837 ; GFX6-NEXT: ; return to shader part epilog
839 ; GFX7-LABEL: s_buffer_load_imm_bit29:
841 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000
842 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
843 ; GFX7-NEXT: ; return to shader part epilog
845 ; GFX8-LABEL: s_buffer_load_imm_bit29:
847 ; GFX8-NEXT: s_brev_b32 s4, 4
848 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
849 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
850 ; GFX8-NEXT: ; return to shader part epilog
852 ; GFX910-LABEL: s_buffer_load_imm_bit29:
854 ; GFX910-NEXT: s_brev_b32 s4, 4
855 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
856 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
857 ; GFX910-NEXT: ; return to shader part epilog
859 ; GFX11-LABEL: s_buffer_load_imm_bit29:
861 ; GFX11-NEXT: s_brev_b32 s4, 4
862 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
863 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
864 ; GFX11-NEXT: ; return to shader part epilog
865 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
869 define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
870 ; GFX6-LABEL: s_buffer_load_imm_bit21:
872 ; GFX6-NEXT: s_mov_b32 s4, 0x200000
874 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
875 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
876 ; GFX6-NEXT: ; return to shader part epilog
878 ; GFX7-LABEL: s_buffer_load_imm_bit21:
880 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
881 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
882 ; GFX7-NEXT: ; return to shader part epilog
884 ; GFX8-LABEL: s_buffer_load_imm_bit21:
886 ; GFX8-NEXT: s_mov_b32 s4, 0x200000
887 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
888 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
889 ; GFX8-NEXT: ; return to shader part epilog
891 ; GFX910-LABEL: s_buffer_load_imm_bit21:
893 ; GFX910-NEXT: s_mov_b32 s4, 0x200000
894 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
895 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
896 ; GFX910-NEXT: ; return to shader part epilog
898 ; GFX11-LABEL: s_buffer_load_imm_bit21:
900 ; GFX11-NEXT: s_mov_b32 s4, 0x200000
901 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
902 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
903 ; GFX11-NEXT: ; return to shader part epilog
904 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
908 define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
909 ; GFX6-LABEL: s_buffer_load_imm_bit20:
911 ; GFX6-NEXT: s_mov_b32 s4, 0x100000
913 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
914 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
915 ; GFX6-NEXT: ; return to shader part epilog
917 ; GFX7-LABEL: s_buffer_load_imm_bit20:
919 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000
920 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
921 ; GFX7-NEXT: ; return to shader part epilog
923 ; GFX8-LABEL: s_buffer_load_imm_bit20:
925 ; GFX8-NEXT: s_mov_b32 s4, 0x100000
926 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
927 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
928 ; GFX8-NEXT: ; return to shader part epilog
930 ; GFX910-LABEL: s_buffer_load_imm_bit20:
932 ; GFX910-NEXT: s_mov_b32 s4, 0x100000
933 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
934 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
935 ; GFX910-NEXT: ; return to shader part epilog
937 ; GFX11-LABEL: s_buffer_load_imm_bit20:
939 ; GFX11-NEXT: s_mov_b32 s4, 0x100000
940 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
941 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
942 ; GFX11-NEXT: ; return to shader part epilog
943 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
947 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
948 ; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
950 ; GFX6-NEXT: s_mov_b32 s4, 0xfff00000
952 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
953 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
954 ; GFX6-NEXT: ; return to shader part epilog
956 ; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
958 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000
959 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
960 ; GFX7-NEXT: ; return to shader part epilog
962 ; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
964 ; GFX8-NEXT: s_mov_b32 s4, 0xfff00000
965 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
966 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
967 ; GFX8-NEXT: ; return to shader part epilog
969 ; GFX910-LABEL: s_buffer_load_imm_neg_bit20:
971 ; GFX910-NEXT: s_mov_b32 s4, 0xfff00000
972 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
973 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
974 ; GFX910-NEXT: ; return to shader part epilog
976 ; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
978 ; GFX11-NEXT: s_mov_b32 s4, 0xfff00000
979 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
980 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
981 ; GFX11-NEXT: ; return to shader part epilog
982 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
986 define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
987 ; GFX6-LABEL: s_buffer_load_imm_bit19:
989 ; GFX6-NEXT: s_mov_b32 s4, 0x80000
991 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
992 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
993 ; GFX6-NEXT: ; return to shader part epilog
995 ; GFX7-LABEL: s_buffer_load_imm_bit19:
997 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000
998 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
999 ; GFX7-NEXT: ; return to shader part epilog
1001 ; GFX8910-LABEL: s_buffer_load_imm_bit19:
1003 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
1004 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1005 ; GFX8910-NEXT: ; return to shader part epilog
1007 ; GFX11-LABEL: s_buffer_load_imm_bit19:
1009 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
1010 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1011 ; GFX11-NEXT: ; return to shader part epilog
1012 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1016 define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
1017 ; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
1019 ; GFX6-NEXT: s_mov_b32 s4, 0xfff80000
1020 ; GFX6-NEXT: s_nop 3
1021 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1022 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1023 ; GFX6-NEXT: ; return to shader part epilog
1025 ; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
1027 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000
1028 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1029 ; GFX7-NEXT: ; return to shader part epilog
1031 ; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
1033 ; GFX8-NEXT: s_mov_b32 s4, 0xfff80000
1034 ; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
1035 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1036 ; GFX8-NEXT: ; return to shader part epilog
1038 ; GFX910-LABEL: s_buffer_load_imm_neg_bit19:
1040 ; GFX910-NEXT: s_mov_b32 s4, 0xfff80000
1041 ; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1042 ; GFX910-NEXT: s_waitcnt lgkmcnt(0)
1043 ; GFX910-NEXT: ; return to shader part epilog
1045 ; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
1047 ; GFX11-NEXT: s_mov_b32 s4, 0xfff80000
1048 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1049 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1050 ; GFX11-NEXT: ; return to shader part epilog
1051 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1055 define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
1056 ; GFX6-LABEL: s_buffer_load_imm_255:
1058 ; GFX6-NEXT: s_movk_i32 s4, 0xff
1059 ; GFX6-NEXT: s_nop 3
1060 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1061 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1062 ; GFX6-NEXT: ; return to shader part epilog
1064 ; GFX7-LABEL: s_buffer_load_imm_255:
1066 ; GFX7-NEXT: s_movk_i32 s4, 0xff
1067 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1068 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1069 ; GFX7-NEXT: ; return to shader part epilog
1071 ; GFX8910-LABEL: s_buffer_load_imm_255:
1073 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1074 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1075 ; GFX8910-NEXT: ; return to shader part epilog
1077 ; GFX11-LABEL: s_buffer_load_imm_255:
1079 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
1080 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1081 ; GFX11-NEXT: ; return to shader part epilog
1082 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
1086 define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
1087 ; GFX67-LABEL: s_buffer_load_imm_256:
1089 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40
1090 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1091 ; GFX67-NEXT: ; return to shader part epilog
1093 ; GFX8910-LABEL: s_buffer_load_imm_256:
1095 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1096 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1097 ; GFX8910-NEXT: ; return to shader part epilog
1099 ; GFX11-LABEL: s_buffer_load_imm_256:
1101 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
1102 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1103 ; GFX11-NEXT: ; return to shader part epilog
1104 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
1108 define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
1109 ; GFX67-LABEL: s_buffer_load_imm_1016:
1111 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe
1112 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1113 ; GFX67-NEXT: ; return to shader part epilog
1115 ; GFX8910-LABEL: s_buffer_load_imm_1016:
1117 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8
1118 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1119 ; GFX8910-NEXT: ; return to shader part epilog
1121 ; GFX11-LABEL: s_buffer_load_imm_1016:
1123 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
1124 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1125 ; GFX11-NEXT: ; return to shader part epilog
1126 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
1130 define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
1131 ; GFX67-LABEL: s_buffer_load_imm_1020:
1133 ; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
1134 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
1135 ; GFX67-NEXT: ; return to shader part epilog
1137 ; GFX8910-LABEL: s_buffer_load_imm_1020:
1139 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc
1140 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1141 ; GFX8910-NEXT: ; return to shader part epilog
1143 ; GFX11-LABEL: s_buffer_load_imm_1020:
1145 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
1146 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1147 ; GFX11-NEXT: ; return to shader part epilog
1148 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
1152 define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
1153 ; GFX6-LABEL: s_buffer_load_imm_1021:
1155 ; GFX6-NEXT: s_movk_i32 s4, 0x3fd
1156 ; GFX6-NEXT: s_nop 3
1157 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1158 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1159 ; GFX6-NEXT: ; return to shader part epilog
1161 ; GFX7-LABEL: s_buffer_load_imm_1021:
1163 ; GFX7-NEXT: s_movk_i32 s4, 0x3fd
1164 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1165 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1166 ; GFX7-NEXT: ; return to shader part epilog
1168 ; GFX8910-LABEL: s_buffer_load_imm_1021:
1170 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd
1171 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1172 ; GFX8910-NEXT: ; return to shader part epilog
1174 ; GFX11-LABEL: s_buffer_load_imm_1021:
1176 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
1177 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1178 ; GFX11-NEXT: ; return to shader part epilog
1179 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
1183 define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
1184 ; GFX6-LABEL: s_buffer_load_imm_1024:
1186 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1187 ; GFX6-NEXT: s_nop 3
1188 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1189 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1190 ; GFX6-NEXT: ; return to shader part epilog
1192 ; GFX7-LABEL: s_buffer_load_imm_1024:
1194 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1195 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1196 ; GFX7-NEXT: ; return to shader part epilog
1198 ; GFX8910-LABEL: s_buffer_load_imm_1024:
1200 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1201 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1202 ; GFX8910-NEXT: ; return to shader part epilog
1204 ; GFX11-LABEL: s_buffer_load_imm_1024:
1206 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1207 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1208 ; GFX11-NEXT: ; return to shader part epilog
1209 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1213 define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
1214 ; GFX6-LABEL: s_buffer_load_imm_1025:
1216 ; GFX6-NEXT: s_movk_i32 s4, 0x401
1217 ; GFX6-NEXT: s_nop 3
1218 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1219 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1220 ; GFX6-NEXT: ; return to shader part epilog
1222 ; GFX7-LABEL: s_buffer_load_imm_1025:
1224 ; GFX7-NEXT: s_movk_i32 s4, 0x401
1225 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
1226 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1227 ; GFX7-NEXT: ; return to shader part epilog
1229 ; GFX8910-LABEL: s_buffer_load_imm_1025:
1231 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x401
1232 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1233 ; GFX8910-NEXT: ; return to shader part epilog
1235 ; GFX11-LABEL: s_buffer_load_imm_1025:
1237 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
1238 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1239 ; GFX11-NEXT: ; return to shader part epilog
1240 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
1244 define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
1245 ; GFX6-LABEL: s_buffer_load_imm_1028:
1247 ; GFX6-NEXT: s_movk_i32 s4, 0x400
1248 ; GFX6-NEXT: s_nop 3
1249 ; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
1250 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1251 ; GFX6-NEXT: ; return to shader part epilog
1253 ; GFX7-LABEL: s_buffer_load_imm_1028:
1255 ; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
1256 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1257 ; GFX7-NEXT: ; return to shader part epilog
1259 ; GFX8910-LABEL: s_buffer_load_imm_1028:
1261 ; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
1262 ; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
1263 ; GFX8910-NEXT: ; return to shader part epilog
1265 ; GFX11-LABEL: s_buffer_load_imm_1028:
1267 ; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
1268 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1269 ; GFX11-NEXT: ; return to shader part epilog
1270 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1274 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
1275 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
1276 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
1277 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
1278 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
1280 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: