1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
4 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s
5 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX11 %s
6 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s
7 ;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s
9 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
10 ; PREGFX10-LABEL: tbuffer_load:
11 ; PREGFX10: ; %bb.0: ; %main_body
12 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT]
13 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] glc
14 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] slc
15 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] glc
16 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
17 ; PREGFX10-NEXT: ; return to shader part epilog
19 ; GFX10-LABEL: tbuffer_load:
20 ; GFX10: ; %bb.0: ; %main_body
21 ; GFX10-NEXT: s_clause 0x3
22 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
23 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] glc
24 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
25 ; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
26 ; GFX10-NEXT: s_waitcnt vmcnt(0)
27 ; GFX10-NEXT: ; return to shader part epilog
29 ; GFX11-LABEL: tbuffer_load:
30 ; GFX11: ; %bb.0: ; %main_body
31 ; GFX11-NEXT: s_clause 0x3
32 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
33 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] glc
34 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
35 ; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
36 ; GFX11-NEXT: s_waitcnt vmcnt(0)
37 ; GFX11-NEXT: ; return to shader part epilog
39 ; GFX12-LABEL: tbuffer_load:
40 ; GFX12: ; %bb.0: ; %main_body
41 ; GFX12-NEXT: s_clause 0x3
42 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78
43 ; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] th:TH_LOAD_NT
44 ; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_HT
45 ; GFX12-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_RT_NT
46 ; GFX12-NEXT: s_wait_loadcnt 0x0
47 ; GFX12-NEXT: ; return to shader part epilog
49 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 78, i32 0)
50 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 63, i32 1)
51 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 2)
52 %vdata_f32 = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 5)
53 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
54 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
55 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
56 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
57 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
58 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
59 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
60 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
63 define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
64 ; PREGFX10-LABEL: tbuffer_load_immoffs:
65 ; PREGFX10: ; %bb.0: ; %main_body
66 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offset:42
67 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
68 ; PREGFX10-NEXT: ; return to shader part epilog
70 ; GFX10-LABEL: tbuffer_load_immoffs:
71 ; GFX10: ; %bb.0: ; %main_body
72 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
73 ; GFX10-NEXT: s_waitcnt vmcnt(0)
74 ; GFX10-NEXT: ; return to shader part epilog
76 ; GFX11-LABEL: tbuffer_load_immoffs:
77 ; GFX11: ; %bb.0: ; %main_body
78 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
79 ; GFX11-NEXT: s_waitcnt vmcnt(0)
80 ; GFX11-NEXT: ; return to shader part epilog
82 ; GFX12-LABEL: tbuffer_load_immoffs:
83 ; GFX12: ; %bb.0: ; %main_body
84 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78 offset:42
85 ; GFX12-NEXT: s_wait_loadcnt 0x0
86 ; GFX12-NEXT: ; return to shader part epilog
88 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 42, i32 0, i32 78, i32 0)
89 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
90 ret <4 x float> %vdata.f
93 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
94 ; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
95 ; PREGFX10: ; %bb.0: ; %main_body
96 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
97 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
98 ; PREGFX10-NEXT: ; return to shader part epilog
100 ; GFX10-LABEL: buffer_load_voffset_large_12bit:
101 ; GFX10: ; %bb.0: ; %main_body
102 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
103 ; GFX10-NEXT: s_waitcnt vmcnt(0)
104 ; GFX10-NEXT: ; return to shader part epilog
106 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
107 ; GFX11: ; %bb.0: ; %main_body
108 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
109 ; GFX11-NEXT: s_waitcnt vmcnt(0)
110 ; GFX11-NEXT: ; return to shader part epilog
112 ; GFX12-LABEL: buffer_load_voffset_large_12bit:
113 ; GFX12: ; %bb.0: ; %main_body
114 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
115 ; GFX12-NEXT: s_wait_loadcnt 0x0
116 ; GFX12-NEXT: ; return to shader part epilog
118 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0)
119 ret <4 x float> %data
122 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
123 ; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
124 ; PREGFX10: ; %bb.0: ; %main_body
125 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
126 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
127 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
128 ; PREGFX10-NEXT: ; return to shader part epilog
130 ; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
131 ; GFX10: ; %bb.0: ; %main_body
132 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
133 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
134 ; GFX10-NEXT: s_waitcnt vmcnt(0)
135 ; GFX10-NEXT: ; return to shader part epilog
137 ; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
138 ; GFX11: ; %bb.0: ; %main_body
139 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
140 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
141 ; GFX11-NEXT: s_waitcnt vmcnt(0)
142 ; GFX11-NEXT: ; return to shader part epilog
144 ; GFX12-LABEL: tbuffer_load_voffset_large_13bit:
145 ; GFX12: ; %bb.0: ; %main_body
146 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8188
147 ; GFX12-NEXT: s_wait_loadcnt 0x0
148 ; GFX12-NEXT: ; return to shader part epilog
150 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0)
151 ret <4 x float> %data
154 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
155 ; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
156 ; PREGFX10: ; %bb.0: ; %main_body
157 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
158 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
159 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
160 ; PREGFX10-NEXT: ; return to shader part epilog
162 ; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
163 ; GFX10: ; %bb.0: ; %main_body
164 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
165 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
166 ; GFX10-NEXT: s_waitcnt vmcnt(0)
167 ; GFX10-NEXT: ; return to shader part epilog
169 ; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
170 ; GFX11: ; %bb.0: ; %main_body
171 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
172 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
173 ; GFX11-NEXT: s_waitcnt vmcnt(0)
174 ; GFX11-NEXT: ; return to shader part epilog
176 ; GFX12-LABEL: tbuffer_load_voffset_large_16bit:
177 ; GFX12: ; %bb.0: ; %main_body
178 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:65532
179 ; GFX12-NEXT: s_wait_loadcnt 0x0
180 ; GFX12-NEXT: ; return to shader part epilog
182 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0)
183 ret <4 x float> %data
186 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
187 ; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
188 ; PREGFX10: ; %bb.0: ; %main_body
189 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
190 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
191 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
192 ; PREGFX10-NEXT: ; return to shader part epilog
194 ; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
195 ; GFX10: ; %bb.0: ; %main_body
196 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
197 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
198 ; GFX10-NEXT: s_waitcnt vmcnt(0)
199 ; GFX10-NEXT: ; return to shader part epilog
201 ; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
202 ; GFX11: ; %bb.0: ; %main_body
203 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
204 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
205 ; GFX11-NEXT: s_waitcnt vmcnt(0)
206 ; GFX11-NEXT: ; return to shader part epilog
208 ; GFX12-LABEL: tbuffer_load_voffset_large_23bit:
209 ; GFX12: ; %bb.0: ; %main_body
210 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8388604
211 ; GFX12-NEXT: s_wait_loadcnt 0x0
212 ; GFX12-NEXT: ; return to shader part epilog
214 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0)
215 ret <4 x float> %data
218 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
219 ; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
220 ; PREGFX10: ; %bb.0: ; %main_body
221 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
222 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
223 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
224 ; PREGFX10-NEXT: ; return to shader part epilog
226 ; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
227 ; GFX10: ; %bb.0: ; %main_body
228 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
229 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
230 ; GFX10-NEXT: s_waitcnt vmcnt(0)
231 ; GFX10-NEXT: ; return to shader part epilog
233 ; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
234 ; GFX11: ; %bb.0: ; %main_body
235 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
236 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
237 ; GFX11-NEXT: s_waitcnt vmcnt(0)
238 ; GFX11-NEXT: ; return to shader part epilog
240 ; GFX12-LABEL: tbuffer_load_voffset_large_24bit:
241 ; GFX12: ; %bb.0: ; %main_body
242 ; GFX12-NEXT: v_mov_b32_e32 v0, 0x800000
243 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:8388604
244 ; GFX12-NEXT: s_wait_loadcnt 0x0
245 ; GFX12-NEXT: ; return to shader part epilog
247 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0)
248 ret <4 x float> %data
251 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
252 ; PREGFX10-LABEL: tbuffer_load_immoffs_large:
254 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
255 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
256 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
257 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
258 ; PREGFX10-NEXT: ; return to shader part epilog
260 ; GFX10-LABEL: tbuffer_load_immoffs_large:
262 ; GFX10-NEXT: s_clause 0x2
263 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
264 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_UINT] offset:73
265 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] offset:1
266 ; GFX10-NEXT: s_waitcnt vmcnt(0)
267 ; GFX10-NEXT: ; return to shader part epilog
269 ; GFX11-LABEL: tbuffer_load_immoffs_large:
271 ; GFX11-NEXT: s_clause 0x2
272 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] offset:4095
273 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73
274 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1
275 ; GFX11-NEXT: s_waitcnt vmcnt(0)
276 ; GFX11-NEXT: ; return to shader part epilog
278 ; GFX12-LABEL: tbuffer_load_immoffs_large:
280 ; GFX12-NEXT: s_mov_b32 s5, 61
281 ; GFX12-NEXT: s_clause 0x2
282 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] offset:4095
283 ; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73
284 ; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1
285 ; GFX12-NEXT: s_wait_loadcnt 0x0
286 ; GFX12-NEXT: ; return to shader part epilog
287 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 4095, i32 61, i32 47, i32 0)
288 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 73, i32 %soffs, i32 62, i32 0)
289 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 1, i32 %soffs, i32 77, i32 0)
290 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
291 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
292 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
293 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
294 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
295 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
296 ret {<4 x float>, <4 x float>, <4 x float>} %r2
299 define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
300 ; PREGFX10-LABEL: tbuffer_load_ofs:
301 ; PREGFX10: ; %bb.0: ; %main_body
302 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen
303 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
304 ; PREGFX10-NEXT: ; return to shader part epilog
306 ; GFX10-LABEL: tbuffer_load_ofs:
307 ; GFX10: ; %bb.0: ; %main_body
308 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
309 ; GFX10-NEXT: s_waitcnt vmcnt(0)
310 ; GFX10-NEXT: ; return to shader part epilog
312 ; GFX11-LABEL: tbuffer_load_ofs:
313 ; GFX11: ; %bb.0: ; %main_body
314 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
315 ; GFX11-NEXT: s_waitcnt vmcnt(0)
316 ; GFX11-NEXT: ; return to shader part epilog
318 ; GFX12-LABEL: tbuffer_load_ofs:
319 ; GFX12: ; %bb.0: ; %main_body
320 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen
321 ; GFX12-NEXT: s_wait_loadcnt 0x0
322 ; GFX12-NEXT: ; return to shader part epilog
324 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %voffs, i32 0, i32 78, i32 0)
325 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
326 ret <4 x float> %vdata.f
329 define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
330 ; PREGFX10-LABEL: tbuffer_load_ofs_imm:
331 ; PREGFX10: ; %bb.0: ; %main_body
332 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen offset:52
333 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
334 ; PREGFX10-NEXT: ; return to shader part epilog
336 ; GFX10-LABEL: tbuffer_load_ofs_imm:
337 ; GFX10: ; %bb.0: ; %main_body
338 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
339 ; GFX10-NEXT: s_waitcnt vmcnt(0)
340 ; GFX10-NEXT: ; return to shader part epilog
342 ; GFX11-LABEL: tbuffer_load_ofs_imm:
343 ; GFX11: ; %bb.0: ; %main_body
344 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
345 ; GFX11-NEXT: s_waitcnt vmcnt(0)
346 ; GFX11-NEXT: ; return to shader part epilog
348 ; GFX12-LABEL: tbuffer_load_ofs_imm:
349 ; GFX12: ; %bb.0: ; %main_body
350 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen offset:52
351 ; GFX12-NEXT: s_wait_loadcnt 0x0
352 ; GFX12-NEXT: ; return to shader part epilog
354 %ofs = add i32 %voffs, 52
355 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %ofs, i32 0, i32 78, i32 0)
356 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
357 ret <4 x float> %vdata.f
360 define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
361 ; PREGFX10-LABEL: buffer_load_xy:
363 ; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
364 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
365 ; PREGFX10-NEXT: ; return to shader part epilog
367 ; GFX10-LABEL: buffer_load_xy:
369 ; GFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
370 ; GFX10-NEXT: s_waitcnt vmcnt(0)
371 ; GFX10-NEXT: ; return to shader part epilog
373 ; GFX11-LABEL: buffer_load_xy:
375 ; GFX11-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:77
376 ; GFX11-NEXT: s_waitcnt vmcnt(0)
377 ; GFX11-NEXT: ; return to shader part epilog
379 ; GFX12-LABEL: buffer_load_xy:
381 ; GFX12-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], null format:77
382 ; GFX12-NEXT: s_wait_loadcnt 0x0
383 ; GFX12-NEXT: ; return to shader part epilog
384 %vdata = call <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
385 %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
386 ret <2 x float> %vdata.f
389 define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
390 ; PREGFX10-LABEL: buffer_load_x:
392 ; PREGFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
393 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
394 ; PREGFX10-NEXT: ; return to shader part epilog
396 ; GFX10-LABEL: buffer_load_x:
398 ; GFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
399 ; GFX10-NEXT: s_waitcnt vmcnt(0)
400 ; GFX10-NEXT: ; return to shader part epilog
402 ; GFX11-LABEL: buffer_load_x:
404 ; GFX11-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:77
405 ; GFX11-NEXT: s_waitcnt vmcnt(0)
406 ; GFX11-NEXT: ; return to shader part epilog
408 ; GFX12-LABEL: buffer_load_x:
410 ; GFX12-NEXT: tbuffer_load_format_x v0, off, s[0:3], null format:77
411 ; GFX12-NEXT: s_wait_loadcnt 0x0
412 ; GFX12-NEXT: ; return to shader part epilog
413 %vdata = call i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
414 %vdata.f = bitcast i32 %vdata to float
418 declare i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32)
419 declare <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32)
420 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32)
421 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)