1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
3 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
4 ;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s
5 ;RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX11 %s
7 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
8 ; PREGFX10-LABEL: tbuffer_load:
9 ; PREGFX10: ; %bb.0: ; %main_body
10 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT]
11 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] glc
12 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] slc
13 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] glc
14 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
15 ; PREGFX10-NEXT: ; return to shader part epilog
17 ; GFX10-LABEL: tbuffer_load:
18 ; GFX10: ; %bb.0: ; %main_body
19 ; GFX10-NEXT: s_clause 0x3
20 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
21 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] glc
22 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
23 ; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
24 ; GFX10-NEXT: s_waitcnt vmcnt(0)
25 ; GFX10-NEXT: ; return to shader part epilog
27 ; GFX11-LABEL: tbuffer_load:
28 ; GFX11: ; %bb.0: ; %main_body
29 ; GFX11-NEXT: s_clause 0x3
30 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
31 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] glc
32 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
33 ; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
34 ; GFX11-NEXT: s_waitcnt vmcnt(0)
35 ; GFX11-NEXT: ; return to shader part epilog
37 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 78, i32 0)
38 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 63, i32 1)
39 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 2)
40 %vdata_f32 = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 5)
41 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
42 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
43 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
44 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
45 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
46 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
47 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
48 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
51 define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
52 ; PREGFX10-LABEL: tbuffer_load_immoffs:
53 ; PREGFX10: ; %bb.0: ; %main_body
54 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offset:42
55 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
56 ; PREGFX10-NEXT: ; return to shader part epilog
58 ; GFX10-LABEL: tbuffer_load_immoffs:
59 ; GFX10: ; %bb.0: ; %main_body
60 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
61 ; GFX10-NEXT: s_waitcnt vmcnt(0)
62 ; GFX10-NEXT: ; return to shader part epilog
64 ; GFX11-LABEL: tbuffer_load_immoffs:
65 ; GFX11: ; %bb.0: ; %main_body
66 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
67 ; GFX11-NEXT: s_waitcnt vmcnt(0)
68 ; GFX11-NEXT: ; return to shader part epilog
70 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 42, i32 0, i32 78, i32 0)
71 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
72 ret <4 x float> %vdata.f
75 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
76 ; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
77 ; PREGFX10: ; %bb.0: ; %main_body
78 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
79 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
80 ; PREGFX10-NEXT: ; return to shader part epilog
82 ; GFX10-LABEL: buffer_load_voffset_large_12bit:
83 ; GFX10: ; %bb.0: ; %main_body
84 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
85 ; GFX10-NEXT: s_waitcnt vmcnt(0)
86 ; GFX10-NEXT: ; return to shader part epilog
88 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
89 ; GFX11: ; %bb.0: ; %main_body
90 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
91 ; GFX11-NEXT: s_waitcnt vmcnt(0)
92 ; GFX11-NEXT: ; return to shader part epilog
94 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0)
98 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
99 ; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
100 ; PREGFX10: ; %bb.0: ; %main_body
101 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
102 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
103 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
104 ; PREGFX10-NEXT: ; return to shader part epilog
106 ; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
107 ; GFX10: ; %bb.0: ; %main_body
108 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
109 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
110 ; GFX10-NEXT: s_waitcnt vmcnt(0)
111 ; GFX10-NEXT: ; return to shader part epilog
113 ; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
114 ; GFX11: ; %bb.0: ; %main_body
115 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
116 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
117 ; GFX11-NEXT: s_waitcnt vmcnt(0)
118 ; GFX11-NEXT: ; return to shader part epilog
120 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0)
121 ret <4 x float> %data
124 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
125 ; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
126 ; PREGFX10: ; %bb.0: ; %main_body
127 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
128 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
129 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
130 ; PREGFX10-NEXT: ; return to shader part epilog
132 ; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
133 ; GFX10: ; %bb.0: ; %main_body
134 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
135 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
136 ; GFX10-NEXT: s_waitcnt vmcnt(0)
137 ; GFX10-NEXT: ; return to shader part epilog
139 ; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
140 ; GFX11: ; %bb.0: ; %main_body
141 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
142 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
143 ; GFX11-NEXT: s_waitcnt vmcnt(0)
144 ; GFX11-NEXT: ; return to shader part epilog
146 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0)
147 ret <4 x float> %data
150 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
151 ; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
152 ; PREGFX10: ; %bb.0: ; %main_body
153 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
154 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
155 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
156 ; PREGFX10-NEXT: ; return to shader part epilog
158 ; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
159 ; GFX10: ; %bb.0: ; %main_body
160 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
161 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
162 ; GFX10-NEXT: s_waitcnt vmcnt(0)
163 ; GFX10-NEXT: ; return to shader part epilog
165 ; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
166 ; GFX11: ; %bb.0: ; %main_body
167 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
168 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
169 ; GFX11-NEXT: s_waitcnt vmcnt(0)
170 ; GFX11-NEXT: ; return to shader part epilog
172 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0)
173 ret <4 x float> %data
176 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
177 ; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
178 ; PREGFX10: ; %bb.0: ; %main_body
179 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
180 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
181 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
182 ; PREGFX10-NEXT: ; return to shader part epilog
184 ; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
185 ; GFX10: ; %bb.0: ; %main_body
186 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
187 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
188 ; GFX10-NEXT: s_waitcnt vmcnt(0)
189 ; GFX10-NEXT: ; return to shader part epilog
191 ; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
192 ; GFX11: ; %bb.0: ; %main_body
193 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
194 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
195 ; GFX11-NEXT: s_waitcnt vmcnt(0)
196 ; GFX11-NEXT: ; return to shader part epilog
198 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0)
199 ret <4 x float> %data
202 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
203 ; PREGFX10-LABEL: tbuffer_load_immoffs_large:
205 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
206 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
207 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
208 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
209 ; PREGFX10-NEXT: ; return to shader part epilog
211 ; GFX10-LABEL: tbuffer_load_immoffs_large:
213 ; GFX10-NEXT: s_clause 0x2
214 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
215 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_UINT] offset:73
216 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] offset:1
217 ; GFX10-NEXT: s_waitcnt vmcnt(0)
218 ; GFX10-NEXT: ; return to shader part epilog
220 ; GFX11-LABEL: tbuffer_load_immoffs_large:
222 ; GFX11-NEXT: s_clause 0x2
223 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] offset:4095
224 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73
225 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1
226 ; GFX11-NEXT: s_waitcnt vmcnt(0)
227 ; GFX11-NEXT: ; return to shader part epilog
228 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 4095, i32 61, i32 47, i32 0)
229 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 73, i32 %soffs, i32 62, i32 0)
230 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 1, i32 %soffs, i32 77, i32 0)
231 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
232 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
233 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
234 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
235 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
236 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
237 ret {<4 x float>, <4 x float>, <4 x float>} %r2
240 define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
241 ; PREGFX10-LABEL: tbuffer_load_ofs:
242 ; PREGFX10: ; %bb.0: ; %main_body
243 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen
244 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
245 ; PREGFX10-NEXT: ; return to shader part epilog
247 ; GFX10-LABEL: tbuffer_load_ofs:
248 ; GFX10: ; %bb.0: ; %main_body
249 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
250 ; GFX10-NEXT: s_waitcnt vmcnt(0)
251 ; GFX10-NEXT: ; return to shader part epilog
253 ; GFX11-LABEL: tbuffer_load_ofs:
254 ; GFX11: ; %bb.0: ; %main_body
255 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
256 ; GFX11-NEXT: s_waitcnt vmcnt(0)
257 ; GFX11-NEXT: ; return to shader part epilog
259 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %voffs, i32 0, i32 78, i32 0)
260 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
261 ret <4 x float> %vdata.f
264 define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
265 ; PREGFX10-LABEL: tbuffer_load_ofs_imm:
266 ; PREGFX10: ; %bb.0: ; %main_body
267 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen offset:52
268 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
269 ; PREGFX10-NEXT: ; return to shader part epilog
271 ; GFX10-LABEL: tbuffer_load_ofs_imm:
272 ; GFX10: ; %bb.0: ; %main_body
273 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
274 ; GFX10-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10-NEXT: ; return to shader part epilog
277 ; GFX11-LABEL: tbuffer_load_ofs_imm:
278 ; GFX11: ; %bb.0: ; %main_body
279 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
280 ; GFX11-NEXT: s_waitcnt vmcnt(0)
281 ; GFX11-NEXT: ; return to shader part epilog
283 %ofs = add i32 %voffs, 52
284 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %ofs, i32 0, i32 78, i32 0)
285 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
286 ret <4 x float> %vdata.f
289 define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
290 ; PREGFX10-LABEL: buffer_load_xy:
292 ; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
293 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
294 ; PREGFX10-NEXT: ; return to shader part epilog
296 ; GFX10-LABEL: buffer_load_xy:
298 ; GFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
299 ; GFX10-NEXT: s_waitcnt vmcnt(0)
300 ; GFX10-NEXT: ; return to shader part epilog
302 ; GFX11-LABEL: buffer_load_xy:
304 ; GFX11-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:77
305 ; GFX11-NEXT: s_waitcnt vmcnt(0)
306 ; GFX11-NEXT: ; return to shader part epilog
307 %vdata = call <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
308 %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
309 ret <2 x float> %vdata.f
312 define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
313 ; PREGFX10-LABEL: buffer_load_x:
315 ; PREGFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
316 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
317 ; PREGFX10-NEXT: ; return to shader part epilog
319 ; GFX10-LABEL: buffer_load_x:
321 ; GFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
322 ; GFX10-NEXT: s_waitcnt vmcnt(0)
323 ; GFX10-NEXT: ; return to shader part epilog
325 ; GFX11-LABEL: buffer_load_x:
327 ; GFX11-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:77
328 ; GFX11-NEXT: s_waitcnt vmcnt(0)
329 ; GFX11-NEXT: ; return to shader part epilog
330 %vdata = call i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
331 %vdata.f = bitcast i32 %vdata to float
335 declare i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32)
336 declare <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32)
337 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32)
338 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)