1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s
3 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s
4 ;RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s
6 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
7 ; GFX6-LABEL: buffer_load:
8 ; GFX6: ; %bb.0: ; %main_body
9 ; GFX6-NEXT: v_mov_b32_e32 v8, 0
10 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
11 ; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
12 ; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
13 ; GFX6-NEXT: s_waitcnt vmcnt(0)
14 ; GFX6-NEXT: ; return to shader part epilog
16 ; GFX8PLUS-LABEL: buffer_load:
17 ; GFX8PLUS: ; %bb.0: ; %main_body
18 ; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0
19 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
20 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
21 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
22 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
23 ; GFX8PLUS-NEXT: ; return to shader part epilog
25 ; GFX11-LABEL: buffer_load:
26 ; GFX11: ; %bb.0: ; %main_body
27 ; GFX11-NEXT: v_mov_b32_e32 v8, 0
28 ; GFX11-NEXT: s_clause 0x2
29 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
30 ; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
31 ; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
32 ; GFX11-NEXT: s_waitcnt vmcnt(0)
33 ; GFX11-NEXT: ; return to shader part epilog
35 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
36 %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
37 %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
38 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
39 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
40 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
41 ret {<4 x float>, <4 x float>, <4 x float>} %r2
44 define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
45 ; GFX6-LABEL: buffer_load_immoffs:
46 ; GFX6: ; %bb.0: ; %main_body
47 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
48 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
49 ; GFX6-NEXT: s_waitcnt vmcnt(0)
50 ; GFX6-NEXT: ; return to shader part epilog
52 ; GFX8PLUS-LABEL: buffer_load_immoffs:
53 ; GFX8PLUS: ; %bb.0: ; %main_body
54 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
55 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
56 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
57 ; GFX8PLUS-NEXT: ; return to shader part epilog
59 ; GFX11-LABEL: buffer_load_immoffs:
60 ; GFX11: ; %bb.0: ; %main_body
61 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
62 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
63 ; GFX11-NEXT: s_waitcnt vmcnt(0)
64 ; GFX11-NEXT: ; return to shader part epilog
66 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0)
70 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
71 ; GFX6-LABEL: buffer_load_immoffs_large:
72 ; GFX6: ; %bb.0: ; %main_body
73 ; GFX6-NEXT: v_mov_b32_e32 v8, 0
74 ; GFX6-NEXT: s_movk_i32 s4, 0x7ffc
75 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
76 ; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
77 ; GFX6-NEXT: s_mov_b32 s4, 0x8ffc
78 ; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
79 ; GFX6-NEXT: s_waitcnt vmcnt(1)
80 ; GFX6-NEXT: v_add_f32_e32 v3, v3, v7
81 ; GFX6-NEXT: v_add_f32_e32 v2, v2, v6
82 ; GFX6-NEXT: v_add_f32_e32 v1, v1, v5
83 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v4
84 ; GFX6-NEXT: s_waitcnt vmcnt(0)
85 ; GFX6-NEXT: v_add_f32_e32 v0, v8, v0
86 ; GFX6-NEXT: v_add_f32_e32 v1, v9, v1
87 ; GFX6-NEXT: v_add_f32_e32 v2, v10, v2
88 ; GFX6-NEXT: v_add_f32_e32 v3, v11, v3
89 ; GFX6-NEXT: ; return to shader part epilog
91 ; GFX8PLUS-LABEL: buffer_load_immoffs_large:
92 ; GFX8PLUS: ; %bb.0: ; %main_body
93 ; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0
94 ; GFX8PLUS-NEXT: s_movk_i32 s4, 0x7ffc
95 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
96 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
97 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0x8ffc
98 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
99 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(1)
100 ; GFX8PLUS-NEXT: v_add_f32_e32 v3, v3, v7
101 ; GFX8PLUS-NEXT: v_add_f32_e32 v2, v2, v6
102 ; GFX8PLUS-NEXT: v_add_f32_e32 v1, v1, v5
103 ; GFX8PLUS-NEXT: v_add_f32_e32 v0, v0, v4
104 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
105 ; GFX8PLUS-NEXT: v_add_f32_e32 v0, v8, v0
106 ; GFX8PLUS-NEXT: v_add_f32_e32 v1, v9, v1
107 ; GFX8PLUS-NEXT: v_add_f32_e32 v2, v10, v2
108 ; GFX8PLUS-NEXT: v_add_f32_e32 v3, v11, v3
109 ; GFX8PLUS-NEXT: ; return to shader part epilog
111 ; GFX11-LABEL: buffer_load_immoffs_large:
112 ; GFX11: ; %bb.0: ; %main_body
113 ; GFX11-NEXT: v_mov_b32_e32 v8, 0
114 ; GFX11-NEXT: s_movk_i32 s4, 0x7ffc
115 ; GFX11-NEXT: s_clause 0x1
116 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
117 ; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
118 ; GFX11-NEXT: s_mov_b32 s4, 0x8ffc
119 ; GFX11-NEXT: s_waitcnt vmcnt(0)
120 ; GFX11-NEXT: v_add_f32_e32 v1, v1, v5
121 ; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
122 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
123 ; GFX11-NEXT: s_waitcnt vmcnt(0)
124 ; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
125 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
126 ; GFX11-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
127 ; GFX11-NEXT: v_add_f32_e32 v2, v10, v2
128 ; GFX11-NEXT: ; return to shader part epilog
130 %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0)
131 %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0)
132 %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0)
133 %d.3 = fadd <4 x float> %d.0, %d.1
134 %data = fadd <4 x float> %d.2, %d.3
135 ret <4 x float> %data
138 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
139 ; GFX6-LABEL: buffer_load_voffset_large_12bit:
140 ; GFX6: ; %bb.0: ; %main_body
141 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
143 ; GFX6-NEXT: s_waitcnt vmcnt(0)
144 ; GFX6-NEXT: ; return to shader part epilog
146 ; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
147 ; GFX8PLUS: ; %bb.0: ; %main_body
148 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
149 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
150 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
151 ; GFX8PLUS-NEXT: ; return to shader part epilog
153 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
154 ; GFX11: ; %bb.0: ; %main_body
155 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
156 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
157 ; GFX11-NEXT: s_waitcnt vmcnt(0)
158 ; GFX11-NEXT: ; return to shader part epilog
160 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0)
161 ret <4 x float> %data
164 define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
165 ; GFX6-LABEL: buffer_load_voffset_large_13bit:
166 ; GFX6: ; %bb.0: ; %main_body
167 ; GFX6-NEXT: s_mov_b32 s4, 0
168 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000
169 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
170 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
171 ; GFX6-NEXT: s_waitcnt vmcnt(0)
172 ; GFX6-NEXT: ; return to shader part epilog
174 ; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
175 ; GFX8PLUS: ; %bb.0: ; %main_body
176 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
177 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000
178 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
179 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
180 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
181 ; GFX8PLUS-NEXT: ; return to shader part epilog
183 ; GFX11-LABEL: buffer_load_voffset_large_13bit:
184 ; GFX11: ; %bb.0: ; %main_body
185 ; GFX11-NEXT: s_mov_b32 s4, 0
186 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
187 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
188 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
189 ; GFX11-NEXT: s_waitcnt vmcnt(0)
190 ; GFX11-NEXT: ; return to shader part epilog
192 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0)
193 ret <4 x float> %data
196 define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
197 ; GFX6-LABEL: buffer_load_voffset_large_16bit:
198 ; GFX6: ; %bb.0: ; %main_body
199 ; GFX6-NEXT: s_mov_b32 s4, 0
200 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000
201 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
202 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
203 ; GFX6-NEXT: s_waitcnt vmcnt(0)
204 ; GFX6-NEXT: ; return to shader part epilog
206 ; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
207 ; GFX8PLUS: ; %bb.0: ; %main_body
208 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
209 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000
210 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
211 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
212 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
213 ; GFX8PLUS-NEXT: ; return to shader part epilog
215 ; GFX11-LABEL: buffer_load_voffset_large_16bit:
216 ; GFX11: ; %bb.0: ; %main_body
217 ; GFX11-NEXT: s_mov_b32 s4, 0
218 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
219 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
220 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
221 ; GFX11-NEXT: s_waitcnt vmcnt(0)
222 ; GFX11-NEXT: ; return to shader part epilog
224 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0)
225 ret <4 x float> %data
228 define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
229 ; GFX6-LABEL: buffer_load_voffset_large_23bit:
230 ; GFX6: ; %bb.0: ; %main_body
231 ; GFX6-NEXT: s_mov_b32 s4, 0
232 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000
233 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
234 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
235 ; GFX6-NEXT: s_waitcnt vmcnt(0)
236 ; GFX6-NEXT: ; return to shader part epilog
238 ; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
239 ; GFX8PLUS: ; %bb.0: ; %main_body
240 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
241 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000
242 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
243 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
244 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
245 ; GFX8PLUS-NEXT: ; return to shader part epilog
247 ; GFX11-LABEL: buffer_load_voffset_large_23bit:
248 ; GFX11: ; %bb.0: ; %main_body
249 ; GFX11-NEXT: s_mov_b32 s4, 0
250 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
251 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
252 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
253 ; GFX11-NEXT: s_waitcnt vmcnt(0)
254 ; GFX11-NEXT: ; return to shader part epilog
256 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0)
257 ret <4 x float> %data
260 define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
261 ; GFX6-LABEL: buffer_load_voffset_large_24bit:
262 ; GFX6: ; %bb.0: ; %main_body
263 ; GFX6-NEXT: s_mov_b32 s4, 0
264 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000
265 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
266 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
267 ; GFX6-NEXT: s_waitcnt vmcnt(0)
268 ; GFX6-NEXT: ; return to shader part epilog
270 ; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
271 ; GFX8PLUS: ; %bb.0: ; %main_body
272 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
273 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000
274 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
275 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
276 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
277 ; GFX8PLUS-NEXT: ; return to shader part epilog
279 ; GFX11-LABEL: buffer_load_voffset_large_24bit:
280 ; GFX11: ; %bb.0: ; %main_body
281 ; GFX11-NEXT: s_mov_b32 s4, 0
282 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
283 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
284 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
285 ; GFX11-NEXT: s_waitcnt vmcnt(0)
286 ; GFX11-NEXT: ; return to shader part epilog
288 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0)
289 ret <4 x float> %data
292 define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
293 ; GFX6-LABEL: buffer_load_idx:
294 ; GFX6: ; %bb.0: ; %main_body
295 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
296 ; GFX6-NEXT: s_waitcnt vmcnt(0)
297 ; GFX6-NEXT: ; return to shader part epilog
299 ; GFX8PLUS-LABEL: buffer_load_idx:
300 ; GFX8PLUS: ; %bb.0: ; %main_body
301 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
302 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
303 ; GFX8PLUS-NEXT: ; return to shader part epilog
305 ; GFX11-LABEL: buffer_load_idx:
306 ; GFX11: ; %bb.0: ; %main_body
307 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
308 ; GFX11-NEXT: s_waitcnt vmcnt(0)
309 ; GFX11-NEXT: ; return to shader part epilog
311 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
312 ret <4 x float> %data
315 define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
316 ; GFX6-LABEL: buffer_load_ofs:
317 ; GFX6: ; %bb.0: ; %main_body
318 ; GFX6-NEXT: s_mov_b32 s4, 0
319 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
320 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
321 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
322 ; GFX6-NEXT: s_waitcnt vmcnt(0)
323 ; GFX6-NEXT: ; return to shader part epilog
325 ; GFX8PLUS-LABEL: buffer_load_ofs:
326 ; GFX8PLUS: ; %bb.0: ; %main_body
327 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
328 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0
329 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
330 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
331 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
332 ; GFX8PLUS-NEXT: ; return to shader part epilog
334 ; GFX11-LABEL: buffer_load_ofs:
335 ; GFX11: ; %bb.0: ; %main_body
336 ; GFX11-NEXT: s_mov_b32 s4, 0
337 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
338 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
339 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
340 ; GFX11-NEXT: s_waitcnt vmcnt(0)
341 ; GFX11-NEXT: ; return to shader part epilog
343 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
344 ret <4 x float> %data
347 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
348 ; GFX6-LABEL: buffer_load_ofs_imm:
349 ; GFX6: ; %bb.0: ; %main_body
350 ; GFX6-NEXT: s_mov_b32 s4, 0
351 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
352 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
353 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
354 ; GFX6-NEXT: s_waitcnt vmcnt(0)
355 ; GFX6-NEXT: ; return to shader part epilog
357 ; GFX8PLUS-LABEL: buffer_load_ofs_imm:
358 ; GFX8PLUS: ; %bb.0: ; %main_body
359 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
360 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0
361 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
362 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
363 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
364 ; GFX8PLUS-NEXT: ; return to shader part epilog
366 ; GFX11-LABEL: buffer_load_ofs_imm:
367 ; GFX11: ; %bb.0: ; %main_body
368 ; GFX11-NEXT: s_mov_b32 s4, 0
369 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
370 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
371 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
372 ; GFX11-NEXT: s_waitcnt vmcnt(0)
373 ; GFX11-NEXT: ; return to shader part epilog
375 %ofs = add i32 %1, 60
376 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
377 ret <4 x float> %data
380 define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
381 ; GFX6-LABEL: buffer_load_both:
382 ; GFX6: ; %bb.0: ; %main_body
383 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
384 ; GFX6-NEXT: s_waitcnt vmcnt(0)
385 ; GFX6-NEXT: ; return to shader part epilog
387 ; GFX8PLUS-LABEL: buffer_load_both:
388 ; GFX8PLUS: ; %bb.0: ; %main_body
389 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
390 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
391 ; GFX8PLUS-NEXT: ; return to shader part epilog
393 ; GFX11-LABEL: buffer_load_both:
394 ; GFX11: ; %bb.0: ; %main_body
395 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
396 ; GFX11-NEXT: s_waitcnt vmcnt(0)
397 ; GFX11-NEXT: ; return to shader part epilog
399 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
400 ret <4 x float> %data
403 define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
404 ; GFX6-LABEL: buffer_load_both_reversed:
405 ; GFX6: ; %bb.0: ; %main_body
406 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
407 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
408 ; GFX6-NEXT: s_waitcnt vmcnt(0)
409 ; GFX6-NEXT: ; return to shader part epilog
411 ; GFX8PLUS-LABEL: buffer_load_both_reversed:
412 ; GFX8PLUS: ; %bb.0: ; %main_body
413 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, v0
414 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
415 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
416 ; GFX8PLUS-NEXT: ; return to shader part epilog
418 ; GFX11-LABEL: buffer_load_both_reversed:
419 ; GFX11: ; %bb.0: ; %main_body
420 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
421 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
422 ; GFX11-NEXT: s_waitcnt vmcnt(0)
423 ; GFX11-NEXT: ; return to shader part epilog
425 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
426 ret <4 x float> %data
429 define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) {
430 ; GFX6-LABEL: buffer_load_x:
431 ; GFX6: ; %bb.0: ; %main_body
432 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
433 ; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
434 ; GFX6-NEXT: s_waitcnt vmcnt(0)
435 ; GFX6-NEXT: ; return to shader part epilog
437 ; GFX8PLUS-LABEL: buffer_load_x:
438 ; GFX8PLUS: ; %bb.0: ; %main_body
439 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
440 ; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
441 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
442 ; GFX8PLUS-NEXT: ; return to shader part epilog
444 ; GFX11-LABEL: buffer_load_x:
445 ; GFX11: ; %bb.0: ; %main_body
446 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
447 ; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
448 ; GFX11-NEXT: s_waitcnt vmcnt(0)
449 ; GFX11-NEXT: ; return to shader part epilog
451 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
455 define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) {
456 ; GFX6-LABEL: buffer_load_x_i32:
457 ; GFX6: ; %bb.0: ; %main_body
458 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
459 ; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
460 ; GFX6-NEXT: s_waitcnt vmcnt(0)
461 ; GFX6-NEXT: ; return to shader part epilog
463 ; GFX8PLUS-LABEL: buffer_load_x_i32:
464 ; GFX8PLUS: ; %bb.0: ; %main_body
465 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
466 ; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
467 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
468 ; GFX8PLUS-NEXT: ; return to shader part epilog
470 ; GFX11-LABEL: buffer_load_x_i32:
471 ; GFX11: ; %bb.0: ; %main_body
472 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
473 ; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
474 ; GFX11-NEXT: s_waitcnt vmcnt(0)
475 ; GFX11-NEXT: ; return to shader part epilog
477 %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
478 %fdata = bitcast i32 %data to float
482 define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) {
483 ; GFX6-LABEL: buffer_load_xy:
484 ; GFX6: ; %bb.0: ; %main_body
485 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
486 ; GFX6-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
487 ; GFX6-NEXT: s_waitcnt vmcnt(0)
488 ; GFX6-NEXT: ; return to shader part epilog
490 ; GFX8PLUS-LABEL: buffer_load_xy:
491 ; GFX8PLUS: ; %bb.0: ; %main_body
492 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
493 ; GFX8PLUS-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
494 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
495 ; GFX8PLUS-NEXT: ; return to shader part epilog
497 ; GFX11-LABEL: buffer_load_xy:
498 ; GFX11: ; %bb.0: ; %main_body
499 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
500 ; GFX11-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
501 ; GFX11-NEXT: s_waitcnt vmcnt(0)
502 ; GFX11-NEXT: ; return to shader part epilog
504 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
505 ret <2 x float> %data
508 define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
509 ; GFX6-LABEL: buffer_load_v4i32_tfe:
511 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
512 ; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
513 ; GFX6-NEXT: s_mov_b32 s2, 0
514 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
515 ; GFX6-NEXT: s_mov_b32 s0, s2
516 ; GFX6-NEXT: s_mov_b32 s1, s2
517 ; GFX6-NEXT: s_waitcnt vmcnt(0)
518 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
519 ; GFX6-NEXT: v_mov_b32_e32 v0, v6
520 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
521 ; GFX6-NEXT: ; return to shader part epilog
523 ; GFX8PLUS-LABEL: buffer_load_v4i32_tfe:
525 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
526 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
527 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
528 ; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
529 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6
530 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
531 ; GFX8PLUS-NEXT: ; return to shader part epilog
533 ; GFX11-LABEL: buffer_load_v4i32_tfe:
535 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
536 ; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
537 ; GFX11-NEXT: s_waitcnt vmcnt(0)
538 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
539 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
540 ; GFX11-NEXT: ; return to shader part epilog
541 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
542 %data = extractvalue { <4 x i32>, i32 } %load, 0
543 store <4 x i32> %data, ptr addrspace(1) %out
544 %status = extractvalue { <4 x i32>, i32 } %load, 1
545 %fstatus = bitcast i32 %status to float
549 define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
550 ; GFX6-LABEL: buffer_load_v4f32_tfe:
552 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
553 ; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
554 ; GFX6-NEXT: s_mov_b32 s2, 0
555 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
556 ; GFX6-NEXT: s_mov_b32 s0, s2
557 ; GFX6-NEXT: s_mov_b32 s1, s2
558 ; GFX6-NEXT: s_waitcnt vmcnt(0)
559 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
560 ; GFX6-NEXT: v_mov_b32_e32 v0, v6
561 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
562 ; GFX6-NEXT: ; return to shader part epilog
564 ; GFX8PLUS-LABEL: buffer_load_v4f32_tfe:
566 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
567 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
568 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
569 ; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
570 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6
571 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
572 ; GFX8PLUS-NEXT: ; return to shader part epilog
574 ; GFX11-LABEL: buffer_load_v4f32_tfe:
576 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
577 ; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
578 ; GFX11-NEXT: s_waitcnt vmcnt(0)
579 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
580 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
581 ; GFX11-NEXT: ; return to shader part epilog
582 %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
583 %data = extractvalue { <4 x float>, i32 } %load, 0
584 store <4 x float> %data, ptr addrspace(1) %out
585 %status = extractvalue { <4 x float>, i32 } %load, 1
586 %fstatus = bitcast i32 %status to float
590 define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
591 ; GFX6-LABEL: buffer_load_v3i32_tfe:
593 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
594 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
595 ; GFX6-NEXT: s_mov_b32 s2, 0
596 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
597 ; GFX6-NEXT: s_mov_b32 s0, s2
598 ; GFX6-NEXT: s_mov_b32 s1, s2
599 ; GFX6-NEXT: s_waitcnt vmcnt(0)
600 ; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
601 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
602 ; GFX6-NEXT: v_mov_b32_e32 v0, v5
603 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
604 ; GFX6-NEXT: ; return to shader part epilog
606 ; GFX8PLUS-LABEL: buffer_load_v3i32_tfe:
608 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
609 ; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
610 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
611 ; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4]
612 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5
613 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
614 ; GFX8PLUS-NEXT: ; return to shader part epilog
616 ; GFX11-LABEL: buffer_load_v3i32_tfe:
618 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
619 ; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
620 ; GFX11-NEXT: s_waitcnt vmcnt(0)
621 ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
622 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
623 ; GFX11-NEXT: ; return to shader part epilog
624 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
625 %data = extractvalue { <3 x i32>, i32 } %load, 0
626 store <3 x i32> %data, ptr addrspace(1) %out
627 %status = extractvalue { <3 x i32>, i32 } %load, 1
628 %fstatus = bitcast i32 %status to float
632 define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
633 ; GFX6-LABEL: buffer_load_v3f32_tfe:
635 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
636 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
637 ; GFX6-NEXT: s_mov_b32 s2, 0
638 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
639 ; GFX6-NEXT: s_mov_b32 s0, s2
640 ; GFX6-NEXT: s_mov_b32 s1, s2
641 ; GFX6-NEXT: s_waitcnt vmcnt(0)
642 ; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
643 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
644 ; GFX6-NEXT: v_mov_b32_e32 v0, v5
645 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
646 ; GFX6-NEXT: ; return to shader part epilog
648 ; GFX8PLUS-LABEL: buffer_load_v3f32_tfe:
650 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
651 ; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
652 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
653 ; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4]
654 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5
655 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
656 ; GFX8PLUS-NEXT: ; return to shader part epilog
658 ; GFX11-LABEL: buffer_load_v3f32_tfe:
660 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
661 ; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
662 ; GFX11-NEXT: s_waitcnt vmcnt(0)
663 ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
664 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
665 ; GFX11-NEXT: ; return to shader part epilog
666 %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
667 %data = extractvalue { <3 x float>, i32 } %load, 0
668 store <3 x float> %data, ptr addrspace(1) %out
669 %status = extractvalue { <3 x float>, i32 } %load, 1
670 %fstatus = bitcast i32 %status to float
674 define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
675 ; GFX6-LABEL: buffer_load_v2i32_tfe:
677 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
678 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
679 ; GFX6-NEXT: s_mov_b32 s2, 0
680 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
681 ; GFX6-NEXT: s_mov_b32 s0, s2
682 ; GFX6-NEXT: s_mov_b32 s1, s2
683 ; GFX6-NEXT: s_waitcnt vmcnt(0)
684 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
685 ; GFX6-NEXT: v_mov_b32_e32 v0, v4
686 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
687 ; GFX6-NEXT: ; return to shader part epilog
689 ; GFX8PLUS-LABEL: buffer_load_v2i32_tfe:
691 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
692 ; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
693 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
694 ; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
695 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4
696 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
697 ; GFX8PLUS-NEXT: ; return to shader part epilog
699 ; GFX11-LABEL: buffer_load_v2i32_tfe:
701 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
702 ; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
703 ; GFX11-NEXT: s_waitcnt vmcnt(0)
704 ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
705 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
706 ; GFX11-NEXT: ; return to shader part epilog
707 %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
708 %data = extractvalue { <2 x i32>, i32 } %load, 0
709 store <2 x i32> %data, ptr addrspace(1) %out
710 %status = extractvalue { <2 x i32>, i32 } %load, 1
711 %fstatus = bitcast i32 %status to float
715 define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
716 ; GFX6-LABEL: buffer_load_v2f32_tfe:
718 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
719 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
720 ; GFX6-NEXT: s_mov_b32 s2, 0
721 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
722 ; GFX6-NEXT: s_mov_b32 s0, s2
723 ; GFX6-NEXT: s_mov_b32 s1, s2
724 ; GFX6-NEXT: s_waitcnt vmcnt(0)
725 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
726 ; GFX6-NEXT: v_mov_b32_e32 v0, v4
727 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
728 ; GFX6-NEXT: ; return to shader part epilog
730 ; GFX8PLUS-LABEL: buffer_load_v2f32_tfe:
732 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
733 ; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
734 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
735 ; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
736 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4
737 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
738 ; GFX8PLUS-NEXT: ; return to shader part epilog
740 ; GFX11-LABEL: buffer_load_v2f32_tfe:
742 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
743 ; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
744 ; GFX11-NEXT: s_waitcnt vmcnt(0)
745 ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
746 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
747 ; GFX11-NEXT: ; return to shader part epilog
748 %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
749 %data = extractvalue { <2 x float>, i32 } %load, 0
750 store <2 x float> %data, ptr addrspace(1) %out
751 %status = extractvalue { <2 x float>, i32 } %load, 1
752 %fstatus = bitcast i32 %status to float
756 define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
757 ; GFX6-LABEL: buffer_load_i32_tfe:
759 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
760 ; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
761 ; GFX6-NEXT: s_mov_b32 s2, 0
762 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
763 ; GFX6-NEXT: s_mov_b32 s0, s2
764 ; GFX6-NEXT: s_mov_b32 s1, s2
765 ; GFX6-NEXT: s_waitcnt vmcnt(0)
766 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
767 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
768 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
769 ; GFX6-NEXT: ; return to shader part epilog
771 ; GFX8PLUS-LABEL: buffer_load_i32_tfe:
773 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
774 ; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
775 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
776 ; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2
777 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3
778 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
779 ; GFX8PLUS-NEXT: ; return to shader part epilog
781 ; GFX11-LABEL: buffer_load_i32_tfe:
783 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
784 ; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
785 ; GFX11-NEXT: s_waitcnt vmcnt(0)
786 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
787 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
788 ; GFX11-NEXT: ; return to shader part epilog
789 %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
790 %data = extractvalue { i32, i32 } %load, 0
791 store i32 %data, ptr addrspace(1) %out
792 %status = extractvalue { i32, i32 } %load, 1
793 %fstatus = bitcast i32 %status to float
797 define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
798 ; GFX6-LABEL: buffer_load_f32_tfe:
800 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
801 ; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
802 ; GFX6-NEXT: s_mov_b32 s2, 0
803 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
804 ; GFX6-NEXT: s_mov_b32 s0, s2
805 ; GFX6-NEXT: s_mov_b32 s1, s2
806 ; GFX6-NEXT: s_waitcnt vmcnt(0)
807 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
808 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
809 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
810 ; GFX6-NEXT: ; return to shader part epilog
812 ; GFX8PLUS-LABEL: buffer_load_f32_tfe:
814 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
815 ; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
816 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
817 ; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2
818 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3
819 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
820 ; GFX8PLUS-NEXT: ; return to shader part epilog
822 ; GFX11-LABEL: buffer_load_f32_tfe:
824 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
825 ; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
826 ; GFX11-NEXT: s_waitcnt vmcnt(0)
827 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
828 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
829 ; GFX11-NEXT: ; return to shader part epilog
830 %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
831 %data = extractvalue { float, i32 } %load, 0
832 store float %data, ptr addrspace(1) %out
833 %status = extractvalue { float, i32 } %load, 1
834 %fstatus = bitcast i32 %status to float
838 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
839 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
840 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
841 declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
842 declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
843 declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
844 declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
845 declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
846 declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
847 declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
848 declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
849 declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
850 attributes #0 = { nounwind readonly }