1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s
4 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s
5 ;RUN: llc < %s -mtriple=amdgcn -mattr=-enable-prt-strict-null -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=NOPRT %s
7 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
8 ; GFX6-LABEL: buffer_load:
9 ; GFX6: ; %bb.0: ; %main_body
10 ; GFX6-NEXT: v_mov_b32_e32 v8, 0
11 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
12 ; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
13 ; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
14 ; GFX6-NEXT: s_waitcnt vmcnt(0)
15 ; GFX6-NEXT: ; return to shader part epilog
17 ; GFX8PLUS-LABEL: buffer_load:
18 ; GFX8PLUS: ; %bb.0: ; %main_body
19 ; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0
20 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
21 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
22 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
23 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
24 ; GFX8PLUS-NEXT: ; return to shader part epilog
26 ; GFX11-LABEL: buffer_load:
27 ; GFX11: ; %bb.0: ; %main_body
28 ; GFX11-NEXT: v_mov_b32_e32 v8, 0
29 ; GFX11-NEXT: s_clause 0x2
30 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
31 ; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
32 ; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
33 ; GFX11-NEXT: s_waitcnt vmcnt(0)
34 ; GFX11-NEXT: ; return to shader part epilog
36 ; NOPRT-LABEL: buffer_load:
37 ; NOPRT: ; %bb.0: ; %main_body
38 ; NOPRT-NEXT: v_mov_b32_e32 v8, 0
39 ; NOPRT-NEXT: s_clause 0x2
40 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
41 ; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
42 ; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
43 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
44 ; NOPRT-NEXT: ; return to shader part epilog
46 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
47 %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
48 %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
49 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
50 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
51 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
52 ret {<4 x float>, <4 x float>, <4 x float>} %r2
55 define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
56 ; GFX6-LABEL: buffer_load_immoffs:
57 ; GFX6: ; %bb.0: ; %main_body
58 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
59 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
60 ; GFX6-NEXT: s_waitcnt vmcnt(0)
61 ; GFX6-NEXT: ; return to shader part epilog
63 ; GFX8PLUS-LABEL: buffer_load_immoffs:
64 ; GFX8PLUS: ; %bb.0: ; %main_body
65 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
66 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
67 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
68 ; GFX8PLUS-NEXT: ; return to shader part epilog
70 ; GFX11-LABEL: buffer_load_immoffs:
71 ; GFX11: ; %bb.0: ; %main_body
72 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
73 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
74 ; GFX11-NEXT: s_waitcnt vmcnt(0)
75 ; GFX11-NEXT: ; return to shader part epilog
77 ; NOPRT-LABEL: buffer_load_immoffs:
78 ; NOPRT: ; %bb.0: ; %main_body
79 ; NOPRT-NEXT: v_mov_b32_e32 v0, 0
80 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
81 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
82 ; NOPRT-NEXT: ; return to shader part epilog
84 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0)
88 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
89 ; GFX6-LABEL: buffer_load_immoffs_large:
90 ; GFX6: ; %bb.0: ; %main_body
91 ; GFX6-NEXT: v_mov_b32_e32 v8, 0
92 ; GFX6-NEXT: s_movk_i32 s4, 0x7ffc
93 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
94 ; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
95 ; GFX6-NEXT: s_mov_b32 s4, 0x8ffc
96 ; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
97 ; GFX6-NEXT: s_waitcnt vmcnt(1)
98 ; GFX6-NEXT: v_add_f32_e32 v3, v3, v7
99 ; GFX6-NEXT: v_add_f32_e32 v2, v2, v6
100 ; GFX6-NEXT: v_add_f32_e32 v1, v1, v5
101 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v4
102 ; GFX6-NEXT: s_waitcnt vmcnt(0)
103 ; GFX6-NEXT: v_add_f32_e32 v0, v8, v0
104 ; GFX6-NEXT: v_add_f32_e32 v1, v9, v1
105 ; GFX6-NEXT: v_add_f32_e32 v2, v10, v2
106 ; GFX6-NEXT: v_add_f32_e32 v3, v11, v3
107 ; GFX6-NEXT: ; return to shader part epilog
109 ; GFX8PLUS-LABEL: buffer_load_immoffs_large:
110 ; GFX8PLUS: ; %bb.0: ; %main_body
111 ; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0
112 ; GFX8PLUS-NEXT: s_movk_i32 s4, 0x7ffc
113 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
114 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
115 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0x8ffc
116 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
117 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(1)
118 ; GFX8PLUS-NEXT: v_add_f32_e32 v3, v3, v7
119 ; GFX8PLUS-NEXT: v_add_f32_e32 v2, v2, v6
120 ; GFX8PLUS-NEXT: v_add_f32_e32 v1, v1, v5
121 ; GFX8PLUS-NEXT: v_add_f32_e32 v0, v0, v4
122 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
123 ; GFX8PLUS-NEXT: v_add_f32_e32 v0, v8, v0
124 ; GFX8PLUS-NEXT: v_add_f32_e32 v1, v9, v1
125 ; GFX8PLUS-NEXT: v_add_f32_e32 v2, v10, v2
126 ; GFX8PLUS-NEXT: v_add_f32_e32 v3, v11, v3
127 ; GFX8PLUS-NEXT: ; return to shader part epilog
129 ; GFX11-LABEL: buffer_load_immoffs_large:
130 ; GFX11: ; %bb.0: ; %main_body
131 ; GFX11-NEXT: v_mov_b32_e32 v8, 0
132 ; GFX11-NEXT: s_movk_i32 s4, 0x7ffc
133 ; GFX11-NEXT: s_clause 0x1
134 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
135 ; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
136 ; GFX11-NEXT: s_mov_b32 s4, 0x8ffc
137 ; GFX11-NEXT: s_waitcnt vmcnt(0)
138 ; GFX11-NEXT: v_add_f32_e32 v1, v1, v5
139 ; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
140 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
141 ; GFX11-NEXT: s_waitcnt vmcnt(0)
142 ; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
143 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
144 ; GFX11-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
145 ; GFX11-NEXT: v_add_f32_e32 v2, v10, v2
146 ; GFX11-NEXT: ; return to shader part epilog
148 ; NOPRT-LABEL: buffer_load_immoffs_large:
149 ; NOPRT: ; %bb.0: ; %main_body
150 ; NOPRT-NEXT: v_mov_b32_e32 v8, 0
151 ; NOPRT-NEXT: s_movk_i32 s4, 0x7ffc
152 ; NOPRT-NEXT: s_clause 0x1
153 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
154 ; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
155 ; NOPRT-NEXT: s_mov_b32 s4, 0x8ffc
156 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
157 ; NOPRT-NEXT: v_add_f32_e32 v1, v1, v5
158 ; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
159 ; NOPRT-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
160 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
161 ; NOPRT-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
162 ; NOPRT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
163 ; NOPRT-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
164 ; NOPRT-NEXT: v_add_f32_e32 v2, v10, v2
165 ; NOPRT-NEXT: ; return to shader part epilog
167 %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0)
168 %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0)
169 %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0)
170 %d.3 = fadd <4 x float> %d.0, %d.1
171 %data = fadd <4 x float> %d.2, %d.3
172 ret <4 x float> %data
175 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
176 ; GFX6-LABEL: buffer_load_voffset_large_12bit:
177 ; GFX6: ; %bb.0: ; %main_body
178 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
179 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
180 ; GFX6-NEXT: s_waitcnt vmcnt(0)
181 ; GFX6-NEXT: ; return to shader part epilog
183 ; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
184 ; GFX8PLUS: ; %bb.0: ; %main_body
185 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
186 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
187 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
188 ; GFX8PLUS-NEXT: ; return to shader part epilog
190 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
191 ; GFX11: ; %bb.0: ; %main_body
192 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
193 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
194 ; GFX11-NEXT: s_waitcnt vmcnt(0)
195 ; GFX11-NEXT: ; return to shader part epilog
197 ; NOPRT-LABEL: buffer_load_voffset_large_12bit:
198 ; NOPRT: ; %bb.0: ; %main_body
199 ; NOPRT-NEXT: v_mov_b32_e32 v0, 0
200 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
201 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
202 ; NOPRT-NEXT: ; return to shader part epilog
204 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0)
205 ret <4 x float> %data
208 define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
209 ; GFX6-LABEL: buffer_load_voffset_large_13bit:
210 ; GFX6: ; %bb.0: ; %main_body
211 ; GFX6-NEXT: s_mov_b32 s4, 0
212 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000
213 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
214 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
215 ; GFX6-NEXT: s_waitcnt vmcnt(0)
216 ; GFX6-NEXT: ; return to shader part epilog
218 ; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
219 ; GFX8PLUS: ; %bb.0: ; %main_body
220 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
221 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000
222 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
223 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
224 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
225 ; GFX8PLUS-NEXT: ; return to shader part epilog
227 ; GFX11-LABEL: buffer_load_voffset_large_13bit:
228 ; GFX11: ; %bb.0: ; %main_body
229 ; GFX11-NEXT: s_mov_b32 s4, 0
230 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
231 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
232 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
233 ; GFX11-NEXT: s_waitcnt vmcnt(0)
234 ; GFX11-NEXT: ; return to shader part epilog
236 ; NOPRT-LABEL: buffer_load_voffset_large_13bit:
237 ; NOPRT: ; %bb.0: ; %main_body
238 ; NOPRT-NEXT: s_mov_b32 s4, 0
239 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
240 ; NOPRT-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
241 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
242 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
243 ; NOPRT-NEXT: ; return to shader part epilog
245 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0)
246 ret <4 x float> %data
249 define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
250 ; GFX6-LABEL: buffer_load_voffset_large_16bit:
251 ; GFX6: ; %bb.0: ; %main_body
252 ; GFX6-NEXT: s_mov_b32 s4, 0
253 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000
254 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
255 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
256 ; GFX6-NEXT: s_waitcnt vmcnt(0)
257 ; GFX6-NEXT: ; return to shader part epilog
259 ; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
260 ; GFX8PLUS: ; %bb.0: ; %main_body
261 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
262 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000
263 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
264 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
265 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
266 ; GFX8PLUS-NEXT: ; return to shader part epilog
268 ; GFX11-LABEL: buffer_load_voffset_large_16bit:
269 ; GFX11: ; %bb.0: ; %main_body
270 ; GFX11-NEXT: s_mov_b32 s4, 0
271 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
272 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
273 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
274 ; GFX11-NEXT: s_waitcnt vmcnt(0)
275 ; GFX11-NEXT: ; return to shader part epilog
277 ; NOPRT-LABEL: buffer_load_voffset_large_16bit:
278 ; NOPRT: ; %bb.0: ; %main_body
279 ; NOPRT-NEXT: s_mov_b32 s4, 0
280 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281 ; NOPRT-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
282 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
283 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
284 ; NOPRT-NEXT: ; return to shader part epilog
286 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0)
287 ret <4 x float> %data
290 define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
291 ; GFX6-LABEL: buffer_load_voffset_large_23bit:
292 ; GFX6: ; %bb.0: ; %main_body
293 ; GFX6-NEXT: s_mov_b32 s4, 0
294 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000
295 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
296 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
297 ; GFX6-NEXT: s_waitcnt vmcnt(0)
298 ; GFX6-NEXT: ; return to shader part epilog
300 ; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
301 ; GFX8PLUS: ; %bb.0: ; %main_body
302 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
303 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000
304 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
305 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
306 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
307 ; GFX8PLUS-NEXT: ; return to shader part epilog
309 ; GFX11-LABEL: buffer_load_voffset_large_23bit:
310 ; GFX11: ; %bb.0: ; %main_body
311 ; GFX11-NEXT: s_mov_b32 s4, 0
312 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
313 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
314 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
315 ; GFX11-NEXT: s_waitcnt vmcnt(0)
316 ; GFX11-NEXT: ; return to shader part epilog
318 ; NOPRT-LABEL: buffer_load_voffset_large_23bit:
319 ; NOPRT: ; %bb.0: ; %main_body
320 ; NOPRT-NEXT: s_mov_b32 s4, 0
321 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
322 ; NOPRT-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
323 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
324 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
325 ; NOPRT-NEXT: ; return to shader part epilog
327 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0)
328 ret <4 x float> %data
331 define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
332 ; GFX6-LABEL: buffer_load_voffset_large_24bit:
333 ; GFX6: ; %bb.0: ; %main_body
334 ; GFX6-NEXT: s_mov_b32 s4, 0
335 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000
336 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
337 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
338 ; GFX6-NEXT: s_waitcnt vmcnt(0)
339 ; GFX6-NEXT: ; return to shader part epilog
341 ; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
342 ; GFX8PLUS: ; %bb.0: ; %main_body
343 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
344 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000
345 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
346 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
347 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
348 ; GFX8PLUS-NEXT: ; return to shader part epilog
350 ; GFX11-LABEL: buffer_load_voffset_large_24bit:
351 ; GFX11: ; %bb.0: ; %main_body
352 ; GFX11-NEXT: s_mov_b32 s4, 0
353 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
354 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
355 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
356 ; GFX11-NEXT: s_waitcnt vmcnt(0)
357 ; GFX11-NEXT: ; return to shader part epilog
359 ; NOPRT-LABEL: buffer_load_voffset_large_24bit:
360 ; NOPRT: ; %bb.0: ; %main_body
361 ; NOPRT-NEXT: s_mov_b32 s4, 0
362 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
363 ; NOPRT-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
364 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
365 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
366 ; NOPRT-NEXT: ; return to shader part epilog
368 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0)
369 ret <4 x float> %data
372 define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
373 ; GFX6-LABEL: buffer_load_idx:
374 ; GFX6: ; %bb.0: ; %main_body
375 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
376 ; GFX6-NEXT: s_waitcnt vmcnt(0)
377 ; GFX6-NEXT: ; return to shader part epilog
379 ; GFX8PLUS-LABEL: buffer_load_idx:
380 ; GFX8PLUS: ; %bb.0: ; %main_body
381 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
382 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
383 ; GFX8PLUS-NEXT: ; return to shader part epilog
385 ; GFX11-LABEL: buffer_load_idx:
386 ; GFX11: ; %bb.0: ; %main_body
387 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
388 ; GFX11-NEXT: s_waitcnt vmcnt(0)
389 ; GFX11-NEXT: ; return to shader part epilog
391 ; NOPRT-LABEL: buffer_load_idx:
392 ; NOPRT: ; %bb.0: ; %main_body
393 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
394 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
395 ; NOPRT-NEXT: ; return to shader part epilog
397 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
398 ret <4 x float> %data
401 define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
402 ; GFX6-LABEL: buffer_load_ofs:
403 ; GFX6: ; %bb.0: ; %main_body
404 ; GFX6-NEXT: s_mov_b32 s4, 0
405 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
406 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
407 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
408 ; GFX6-NEXT: s_waitcnt vmcnt(0)
409 ; GFX6-NEXT: ; return to shader part epilog
411 ; GFX8PLUS-LABEL: buffer_load_ofs:
412 ; GFX8PLUS: ; %bb.0: ; %main_body
413 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
414 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0
415 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
416 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
417 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
418 ; GFX8PLUS-NEXT: ; return to shader part epilog
420 ; GFX11-LABEL: buffer_load_ofs:
421 ; GFX11: ; %bb.0: ; %main_body
422 ; GFX11-NEXT: s_mov_b32 s4, 0
423 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
424 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
425 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
426 ; GFX11-NEXT: s_waitcnt vmcnt(0)
427 ; GFX11-NEXT: ; return to shader part epilog
429 ; NOPRT-LABEL: buffer_load_ofs:
430 ; NOPRT: ; %bb.0: ; %main_body
431 ; NOPRT-NEXT: s_mov_b32 s4, 0
432 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
433 ; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
434 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
435 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
436 ; NOPRT-NEXT: ; return to shader part epilog
438 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
439 ret <4 x float> %data
442 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
443 ; GFX6-LABEL: buffer_load_ofs_imm:
444 ; GFX6: ; %bb.0: ; %main_body
445 ; GFX6-NEXT: s_mov_b32 s4, 0
446 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
447 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
448 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
449 ; GFX6-NEXT: s_waitcnt vmcnt(0)
450 ; GFX6-NEXT: ; return to shader part epilog
452 ; GFX8PLUS-LABEL: buffer_load_ofs_imm:
453 ; GFX8PLUS: ; %bb.0: ; %main_body
454 ; GFX8PLUS-NEXT: s_mov_b32 s4, 0
455 ; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0
456 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
457 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
458 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
459 ; GFX8PLUS-NEXT: ; return to shader part epilog
461 ; GFX11-LABEL: buffer_load_ofs_imm:
462 ; GFX11: ; %bb.0: ; %main_body
463 ; GFX11-NEXT: s_mov_b32 s4, 0
464 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
465 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
466 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
467 ; GFX11-NEXT: s_waitcnt vmcnt(0)
468 ; GFX11-NEXT: ; return to shader part epilog
470 ; NOPRT-LABEL: buffer_load_ofs_imm:
471 ; NOPRT: ; %bb.0: ; %main_body
472 ; NOPRT-NEXT: s_mov_b32 s4, 0
473 ; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
474 ; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
475 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
476 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
477 ; NOPRT-NEXT: ; return to shader part epilog
479 %ofs = add i32 %1, 60
480 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
481 ret <4 x float> %data
484 define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
485 ; GFX6-LABEL: buffer_load_both:
486 ; GFX6: ; %bb.0: ; %main_body
487 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
488 ; GFX6-NEXT: s_waitcnt vmcnt(0)
489 ; GFX6-NEXT: ; return to shader part epilog
491 ; GFX8PLUS-LABEL: buffer_load_both:
492 ; GFX8PLUS: ; %bb.0: ; %main_body
493 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
494 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
495 ; GFX8PLUS-NEXT: ; return to shader part epilog
497 ; GFX11-LABEL: buffer_load_both:
498 ; GFX11: ; %bb.0: ; %main_body
499 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
500 ; GFX11-NEXT: s_waitcnt vmcnt(0)
501 ; GFX11-NEXT: ; return to shader part epilog
503 ; NOPRT-LABEL: buffer_load_both:
504 ; NOPRT: ; %bb.0: ; %main_body
505 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
506 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
507 ; NOPRT-NEXT: ; return to shader part epilog
509 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
510 ret <4 x float> %data
513 define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
514 ; GFX6-LABEL: buffer_load_both_reversed:
515 ; GFX6: ; %bb.0: ; %main_body
516 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
517 ; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
518 ; GFX6-NEXT: s_waitcnt vmcnt(0)
519 ; GFX6-NEXT: ; return to shader part epilog
521 ; GFX8PLUS-LABEL: buffer_load_both_reversed:
522 ; GFX8PLUS: ; %bb.0: ; %main_body
523 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, v0
524 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
525 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
526 ; GFX8PLUS-NEXT: ; return to shader part epilog
528 ; GFX11-LABEL: buffer_load_both_reversed:
529 ; GFX11: ; %bb.0: ; %main_body
530 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
531 ; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
532 ; GFX11-NEXT: s_waitcnt vmcnt(0)
533 ; GFX11-NEXT: ; return to shader part epilog
535 ; NOPRT-LABEL: buffer_load_both_reversed:
536 ; NOPRT: ; %bb.0: ; %main_body
537 ; NOPRT-NEXT: v_mov_b32_e32 v2, v0
538 ; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
539 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
540 ; NOPRT-NEXT: ; return to shader part epilog
542 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
543 ret <4 x float> %data
546 define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) {
547 ; GFX6-LABEL: buffer_load_x:
548 ; GFX6: ; %bb.0: ; %main_body
549 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
550 ; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
551 ; GFX6-NEXT: s_waitcnt vmcnt(0)
552 ; GFX6-NEXT: ; return to shader part epilog
554 ; GFX8PLUS-LABEL: buffer_load_x:
555 ; GFX8PLUS: ; %bb.0: ; %main_body
556 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
557 ; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
558 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
559 ; GFX8PLUS-NEXT: ; return to shader part epilog
561 ; GFX11-LABEL: buffer_load_x:
562 ; GFX11: ; %bb.0: ; %main_body
563 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
564 ; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
565 ; GFX11-NEXT: s_waitcnt vmcnt(0)
566 ; GFX11-NEXT: ; return to shader part epilog
568 ; NOPRT-LABEL: buffer_load_x:
569 ; NOPRT: ; %bb.0: ; %main_body
570 ; NOPRT-NEXT: v_mov_b32_e32 v0, 0
571 ; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
572 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
573 ; NOPRT-NEXT: ; return to shader part epilog
575 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
579 define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) {
580 ; GFX6-LABEL: buffer_load_x_i32:
581 ; GFX6: ; %bb.0: ; %main_body
582 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
583 ; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
584 ; GFX6-NEXT: s_waitcnt vmcnt(0)
585 ; GFX6-NEXT: ; return to shader part epilog
587 ; GFX8PLUS-LABEL: buffer_load_x_i32:
588 ; GFX8PLUS: ; %bb.0: ; %main_body
589 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
590 ; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
591 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
592 ; GFX8PLUS-NEXT: ; return to shader part epilog
594 ; GFX11-LABEL: buffer_load_x_i32:
595 ; GFX11: ; %bb.0: ; %main_body
596 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
597 ; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
598 ; GFX11-NEXT: s_waitcnt vmcnt(0)
599 ; GFX11-NEXT: ; return to shader part epilog
601 ; NOPRT-LABEL: buffer_load_x_i32:
602 ; NOPRT: ; %bb.0: ; %main_body
603 ; NOPRT-NEXT: v_mov_b32_e32 v0, 0
604 ; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen
605 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
606 ; NOPRT-NEXT: ; return to shader part epilog
608 %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
609 %fdata = bitcast i32 %data to float
613 define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) {
614 ; GFX6-LABEL: buffer_load_xy:
615 ; GFX6: ; %bb.0: ; %main_body
616 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
617 ; GFX6-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
618 ; GFX6-NEXT: s_waitcnt vmcnt(0)
619 ; GFX6-NEXT: ; return to shader part epilog
621 ; GFX8PLUS-LABEL: buffer_load_xy:
622 ; GFX8PLUS: ; %bb.0: ; %main_body
623 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
624 ; GFX8PLUS-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
625 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
626 ; GFX8PLUS-NEXT: ; return to shader part epilog
628 ; GFX11-LABEL: buffer_load_xy:
629 ; GFX11: ; %bb.0: ; %main_body
630 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
631 ; GFX11-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
632 ; GFX11-NEXT: s_waitcnt vmcnt(0)
633 ; GFX11-NEXT: ; return to shader part epilog
635 ; NOPRT-LABEL: buffer_load_xy:
636 ; NOPRT: ; %bb.0: ; %main_body
637 ; NOPRT-NEXT: v_mov_b32_e32 v0, 0
638 ; NOPRT-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
639 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
640 ; NOPRT-NEXT: ; return to shader part epilog
642 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
643 ret <2 x float> %data
646 define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
647 ; GFX6-LABEL: buffer_load_v4i32_tfe:
649 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
650 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
651 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
652 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
653 ; GFX6-NEXT: v_mov_b32_e32 v6, v2
654 ; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
655 ; GFX6-NEXT: s_mov_b32 s2, 0
656 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
657 ; GFX6-NEXT: s_mov_b32 s0, s2
658 ; GFX6-NEXT: s_mov_b32 s1, s2
659 ; GFX6-NEXT: s_waitcnt vmcnt(0)
660 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
661 ; GFX6-NEXT: v_mov_b32_e32 v0, v6
662 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
663 ; GFX6-NEXT: ; return to shader part epilog
665 ; GFX8PLUS-LABEL: buffer_load_v4i32_tfe:
667 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
668 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
669 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
670 ; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2
671 ; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2
672 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
673 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
674 ; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
675 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6
676 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
677 ; GFX8PLUS-NEXT: ; return to shader part epilog
679 ; GFX11-LABEL: buffer_load_v4i32_tfe:
681 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
682 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
683 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
684 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
685 ; GFX11-NEXT: v_mov_b32_e32 v5, v2
686 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
687 ; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
688 ; GFX11-NEXT: s_waitcnt vmcnt(0)
689 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
690 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
691 ; GFX11-NEXT: ; return to shader part epilog
693 ; NOPRT-LABEL: buffer_load_v4i32_tfe:
695 ; NOPRT-NEXT: v_mov_b32_e32 v6, 0
696 ; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe
697 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
698 ; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off
699 ; NOPRT-NEXT: v_mov_b32_e32 v0, v6
700 ; NOPRT-NEXT: ; return to shader part epilog
701 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
702 %data = extractvalue { <4 x i32>, i32 } %load, 0
703 store <4 x i32> %data, ptr addrspace(1) %out
704 %status = extractvalue { <4 x i32>, i32 } %load, 1
705 %fstatus = bitcast i32 %status to float
709 define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
710 ; GFX6-LABEL: buffer_load_v4f32_tfe:
712 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
713 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
714 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
715 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
716 ; GFX6-NEXT: v_mov_b32_e32 v6, v2
717 ; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
718 ; GFX6-NEXT: s_mov_b32 s2, 0
719 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
720 ; GFX6-NEXT: s_mov_b32 s0, s2
721 ; GFX6-NEXT: s_mov_b32 s1, s2
722 ; GFX6-NEXT: s_waitcnt vmcnt(0)
723 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
724 ; GFX6-NEXT: v_mov_b32_e32 v0, v6
725 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
726 ; GFX6-NEXT: ; return to shader part epilog
728 ; GFX8PLUS-LABEL: buffer_load_v4f32_tfe:
730 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
731 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
732 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
733 ; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2
734 ; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2
735 ; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
736 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
737 ; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
738 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6
739 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
740 ; GFX8PLUS-NEXT: ; return to shader part epilog
742 ; GFX11-LABEL: buffer_load_v4f32_tfe:
744 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
745 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
746 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
747 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
748 ; GFX11-NEXT: v_mov_b32_e32 v5, v2
749 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
750 ; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
751 ; GFX11-NEXT: s_waitcnt vmcnt(0)
752 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
753 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
754 ; GFX11-NEXT: ; return to shader part epilog
756 ; NOPRT-LABEL: buffer_load_v4f32_tfe:
758 ; NOPRT-NEXT: v_mov_b32_e32 v6, 0
759 ; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe
760 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
761 ; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off
762 ; NOPRT-NEXT: v_mov_b32_e32 v0, v6
763 ; NOPRT-NEXT: ; return to shader part epilog
764 %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
765 %data = extractvalue { <4 x float>, i32 } %load, 0
766 store <4 x float> %data, ptr addrspace(1) %out
767 %status = extractvalue { <4 x float>, i32 } %load, 1
768 %fstatus = bitcast i32 %status to float
772 define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
773 ; GFX6-LABEL: buffer_load_v3i32_tfe:
775 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
776 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
777 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
778 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
779 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
780 ; GFX6-NEXT: s_mov_b32 s2, 0
781 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
782 ; GFX6-NEXT: s_mov_b32 s0, s2
783 ; GFX6-NEXT: s_mov_b32 s1, s2
784 ; GFX6-NEXT: s_waitcnt vmcnt(0)
785 ; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
786 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
787 ; GFX6-NEXT: v_mov_b32_e32 v0, v5
788 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
789 ; GFX6-NEXT: ; return to shader part epilog
791 ; GFX8PLUS-LABEL: buffer_load_v3i32_tfe:
793 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
794 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
795 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
796 ; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2
797 ; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
798 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
799 ; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4]
800 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5
801 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
802 ; GFX8PLUS-NEXT: ; return to shader part epilog
804 ; GFX11-LABEL: buffer_load_v3i32_tfe:
806 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
807 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
808 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
809 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
810 ; GFX11-NEXT: v_mov_b32_e32 v5, v2
811 ; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
812 ; GFX11-NEXT: s_waitcnt vmcnt(0)
813 ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
814 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
815 ; GFX11-NEXT: ; return to shader part epilog
817 ; NOPRT-LABEL: buffer_load_v3i32_tfe:
819 ; NOPRT-NEXT: v_mov_b32_e32 v5, 0
820 ; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
821 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
822 ; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off
823 ; NOPRT-NEXT: v_mov_b32_e32 v0, v5
824 ; NOPRT-NEXT: ; return to shader part epilog
825 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
826 %data = extractvalue { <3 x i32>, i32 } %load, 0
827 store <3 x i32> %data, ptr addrspace(1) %out
828 %status = extractvalue { <3 x i32>, i32 } %load, 1
829 %fstatus = bitcast i32 %status to float
833 define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
834 ; GFX6-LABEL: buffer_load_v3f32_tfe:
836 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
837 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
838 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
839 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
840 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
841 ; GFX6-NEXT: s_mov_b32 s2, 0
842 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
843 ; GFX6-NEXT: s_mov_b32 s0, s2
844 ; GFX6-NEXT: s_mov_b32 s1, s2
845 ; GFX6-NEXT: s_waitcnt vmcnt(0)
846 ; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
847 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
848 ; GFX6-NEXT: v_mov_b32_e32 v0, v5
849 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
850 ; GFX6-NEXT: ; return to shader part epilog
852 ; GFX8PLUS-LABEL: buffer_load_v3f32_tfe:
854 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
855 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
856 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
857 ; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2
858 ; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
859 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
860 ; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4]
861 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5
862 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
863 ; GFX8PLUS-NEXT: ; return to shader part epilog
865 ; GFX11-LABEL: buffer_load_v3f32_tfe:
867 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
868 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
869 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
870 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
871 ; GFX11-NEXT: v_mov_b32_e32 v5, v2
872 ; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
873 ; GFX11-NEXT: s_waitcnt vmcnt(0)
874 ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
875 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
876 ; GFX11-NEXT: ; return to shader part epilog
878 ; NOPRT-LABEL: buffer_load_v3f32_tfe:
880 ; NOPRT-NEXT: v_mov_b32_e32 v5, 0
881 ; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
882 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
883 ; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off
884 ; NOPRT-NEXT: v_mov_b32_e32 v0, v5
885 ; NOPRT-NEXT: ; return to shader part epilog
886 %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
887 %data = extractvalue { <3 x float>, i32 } %load, 0
888 store <3 x float> %data, ptr addrspace(1) %out
889 %status = extractvalue { <3 x float>, i32 } %load, 1
890 %fstatus = bitcast i32 %status to float
894 define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
895 ; GFX6-LABEL: buffer_load_v2i32_tfe:
897 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
898 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
899 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
900 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
901 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
902 ; GFX6-NEXT: s_mov_b32 s2, 0
903 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
904 ; GFX6-NEXT: s_mov_b32 s0, s2
905 ; GFX6-NEXT: s_mov_b32 s1, s2
906 ; GFX6-NEXT: s_waitcnt vmcnt(0)
907 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
908 ; GFX6-NEXT: v_mov_b32_e32 v0, v4
909 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
910 ; GFX6-NEXT: ; return to shader part epilog
912 ; GFX8PLUS-LABEL: buffer_load_v2i32_tfe:
914 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
915 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
916 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
917 ; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
918 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
919 ; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
920 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4
921 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
922 ; GFX8PLUS-NEXT: ; return to shader part epilog
924 ; GFX11-LABEL: buffer_load_v2i32_tfe:
926 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
927 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
928 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
929 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
930 ; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
931 ; GFX11-NEXT: s_waitcnt vmcnt(0)
932 ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
933 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
934 ; GFX11-NEXT: ; return to shader part epilog
936 ; NOPRT-LABEL: buffer_load_v2i32_tfe:
938 ; NOPRT-NEXT: v_mov_b32_e32 v4, 0
939 ; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
940 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
941 ; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off
942 ; NOPRT-NEXT: v_mov_b32_e32 v0, v4
943 ; NOPRT-NEXT: ; return to shader part epilog
944 %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
945 %data = extractvalue { <2 x i32>, i32 } %load, 0
946 store <2 x i32> %data, ptr addrspace(1) %out
947 %status = extractvalue { <2 x i32>, i32 } %load, 1
948 %fstatus = bitcast i32 %status to float
952 define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
953 ; GFX6-LABEL: buffer_load_v2f32_tfe:
955 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
956 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
957 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
958 ; GFX6-NEXT: v_mov_b32_e32 v5, v2
959 ; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
960 ; GFX6-NEXT: s_mov_b32 s2, 0
961 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
962 ; GFX6-NEXT: s_mov_b32 s0, s2
963 ; GFX6-NEXT: s_mov_b32 s1, s2
964 ; GFX6-NEXT: s_waitcnt vmcnt(0)
965 ; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
966 ; GFX6-NEXT: v_mov_b32_e32 v0, v4
967 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
968 ; GFX6-NEXT: ; return to shader part epilog
970 ; GFX8PLUS-LABEL: buffer_load_v2f32_tfe:
972 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
973 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
974 ; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2
975 ; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
976 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
977 ; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
978 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4
979 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
980 ; GFX8PLUS-NEXT: ; return to shader part epilog
982 ; GFX11-LABEL: buffer_load_v2f32_tfe:
984 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
985 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
986 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
987 ; GFX11-NEXT: v_mov_b32_e32 v4, v2
988 ; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
989 ; GFX11-NEXT: s_waitcnt vmcnt(0)
990 ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
991 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
992 ; GFX11-NEXT: ; return to shader part epilog
994 ; NOPRT-LABEL: buffer_load_v2f32_tfe:
996 ; NOPRT-NEXT: v_mov_b32_e32 v4, 0
997 ; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
998 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
999 ; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off
1000 ; NOPRT-NEXT: v_mov_b32_e32 v0, v4
1001 ; NOPRT-NEXT: ; return to shader part epilog
1002 %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1003 %data = extractvalue { <2 x float>, i32 } %load, 0
1004 store <2 x float> %data, ptr addrspace(1) %out
1005 %status = extractvalue { <2 x float>, i32 } %load, 1
1006 %fstatus = bitcast i32 %status to float
1010 define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
1011 ; GFX6-LABEL: buffer_load_i32_tfe:
1013 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
1014 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
1015 ; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1016 ; GFX6-NEXT: s_mov_b32 s2, 0
1017 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1018 ; GFX6-NEXT: s_mov_b32 s0, s2
1019 ; GFX6-NEXT: s_mov_b32 s1, s2
1020 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1021 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1022 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
1023 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1024 ; GFX6-NEXT: ; return to shader part epilog
1026 ; GFX8PLUS-LABEL: buffer_load_i32_tfe:
1027 ; GFX8PLUS: ; %bb.0:
1028 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
1029 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
1030 ; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1031 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
1032 ; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2
1033 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3
1034 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
1035 ; GFX8PLUS-NEXT: ; return to shader part epilog
1037 ; GFX11-LABEL: buffer_load_i32_tfe:
1039 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1040 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1041 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
1042 ; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1043 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1044 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1045 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
1046 ; GFX11-NEXT: ; return to shader part epilog
1048 ; NOPRT-LABEL: buffer_load_i32_tfe:
1050 ; NOPRT-NEXT: v_mov_b32_e32 v3, 0
1051 ; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1052 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
1053 ; NOPRT-NEXT: global_store_b32 v[0:1], v2, off
1054 ; NOPRT-NEXT: v_mov_b32_e32 v0, v3
1055 ; NOPRT-NEXT: ; return to shader part epilog
1056 %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1057 %data = extractvalue { i32, i32 } %load, 0
1058 store i32 %data, ptr addrspace(1) %out
1059 %status = extractvalue { i32, i32 } %load, 1
1060 %fstatus = bitcast i32 %status to float
1064 define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
1065 ; GFX6-LABEL: buffer_load_f32_tfe:
1067 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
1068 ; GFX6-NEXT: v_mov_b32_e32 v3, v2
1069 ; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1070 ; GFX6-NEXT: s_mov_b32 s2, 0
1071 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1072 ; GFX6-NEXT: s_mov_b32 s0, s2
1073 ; GFX6-NEXT: s_mov_b32 s1, s2
1074 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1075 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1076 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
1077 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1078 ; GFX6-NEXT: ; return to shader part epilog
1080 ; GFX8PLUS-LABEL: buffer_load_f32_tfe:
1081 ; GFX8PLUS: ; %bb.0:
1082 ; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0
1083 ; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2
1084 ; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1085 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
1086 ; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2
1087 ; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3
1088 ; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
1089 ; GFX8PLUS-NEXT: ; return to shader part epilog
1091 ; GFX11-LABEL: buffer_load_f32_tfe:
1093 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1094 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1095 ; GFX11-NEXT: v_mov_b32_e32 v3, v2
1096 ; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1097 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1098 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1099 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
1100 ; GFX11-NEXT: ; return to shader part epilog
1102 ; NOPRT-LABEL: buffer_load_f32_tfe:
1104 ; NOPRT-NEXT: v_mov_b32_e32 v3, 0
1105 ; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1106 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
1107 ; NOPRT-NEXT: global_store_b32 v[0:1], v2, off
1108 ; NOPRT-NEXT: v_mov_b32_e32 v0, v3
1109 ; NOPRT-NEXT: ; return to shader part epilog
1110 %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1111 %data = extractvalue { float, i32 } %load, 0
1112 store float %data, ptr addrspace(1) %out
1113 %status = extractvalue { float, i32 } %load, 1
1114 %fstatus = bitcast i32 %status to float
1118 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
1119 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
1120 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
1121 declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
1122 declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1123 declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1124 declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1125 declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1126 declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1127 declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1128 declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1129 declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1130 attributes #0 = { nounwind readonly }