1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s
4 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s
5 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GFX11 %s
6 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
7 ;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
9 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
10 ; PREGFX10-LABEL: tbuffer_load:
11 ; PREGFX10: ; %bb.0: ; %main_body
12 ; PREGFX10-NEXT: v_mov_b32_e32 v12, 0
13 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen
14 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen glc
15 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen slc
16 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen glc
17 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
18 ; PREGFX10-NEXT: ; return to shader part epilog
20 ; GFX10-LABEL: tbuffer_load:
21 ; GFX10: ; %bb.0: ; %main_body
22 ; GFX10-NEXT: v_mov_b32_e32 v16, 0
23 ; GFX10-NEXT: s_clause 0x3
24 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v16, s[0:3], 0 format:78 idxen
25 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v16, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen glc
26 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc
27 ; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc
28 ; GFX10-NEXT: s_waitcnt vmcnt(0)
29 ; GFX10-NEXT: ; return to shader part epilog
31 ; GFX11-LABEL: tbuffer_load:
32 ; GFX11: ; %bb.0: ; %main_body
33 ; GFX11-NEXT: v_mov_b32_e32 v12, 0
34 ; GFX11-NEXT: s_clause 0x3
35 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:78 idxen
36 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen glc
37 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc
38 ; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc
39 ; GFX11-NEXT: s_waitcnt vmcnt(0)
40 ; GFX11-NEXT: ; return to shader part epilog
42 ; GFX12-LABEL: tbuffer_load:
43 ; GFX12: ; %bb.0: ; %main_body
44 ; GFX12-NEXT: v_mov_b32_e32 v12, 0
45 ; GFX12-NEXT: s_clause 0x3
46 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], null format:78 idxen
47 ; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen th:TH_LOAD_NT
48 ; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_HT
49 ; GFX12-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_RT_NT
50 ; GFX12-NEXT: s_wait_loadcnt 0x0
51 ; GFX12-NEXT: ; return to shader part epilog
53 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0)
54 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 63, i32 1)
55 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 2)
56 %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 5)
57 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
58 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
59 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
60 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
61 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
62 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
63 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
64 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
67 define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
68 ; PREGFX10-LABEL: tbuffer_load_immoffs:
69 ; PREGFX10: ; %bb.0: ; %main_body
70 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0
71 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:42
72 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
73 ; PREGFX10-NEXT: ; return to shader part epilog
75 ; GFX10-LABEL: tbuffer_load_immoffs:
76 ; GFX10: ; %bb.0: ; %main_body
77 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
78 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42
79 ; GFX10-NEXT: s_waitcnt vmcnt(0)
80 ; GFX10-NEXT: ; return to shader part epilog
82 ; GFX11-LABEL: tbuffer_load_immoffs:
83 ; GFX11: ; %bb.0: ; %main_body
84 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
85 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42
86 ; GFX11-NEXT: s_waitcnt vmcnt(0)
87 ; GFX11-NEXT: ; return to shader part epilog
89 ; GFX12-LABEL: tbuffer_load_immoffs:
90 ; GFX12: ; %bb.0: ; %main_body
91 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
92 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen offset:42
93 ; GFX12-NEXT: s_wait_loadcnt 0x0
94 ; GFX12-NEXT: ; return to shader part epilog
96 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 78, i32 0)
97 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
98 ret <4 x float> %vdata.f
101 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
102 ; PREGFX10-LABEL: tbuffer_load_immoffs_large:
104 ; PREGFX10-NEXT: v_mov_b32_e32 v8, 0
105 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
106 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73
107 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1
108 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
109 ; PREGFX10-NEXT: ; return to shader part epilog
111 ; GFX10-LABEL: tbuffer_load_immoffs_large:
113 ; GFX10-NEXT: v_mov_b32_e32 v12, 0
114 ; GFX10-NEXT: s_clause 0x2
115 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095
116 ; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], s4 format:[BUF_FMT_32_32_UINT] idxen offset:73
117 ; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:1
118 ; GFX10-NEXT: s_waitcnt vmcnt(0)
119 ; GFX10-NEXT: ; return to shader part epilog
121 ; GFX11-LABEL: tbuffer_load_immoffs_large:
123 ; GFX11-NEXT: v_mov_b32_e32 v8, 0
124 ; GFX11-NEXT: s_clause 0x2
125 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095
126 ; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73
127 ; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1
128 ; GFX11-NEXT: s_waitcnt vmcnt(0)
129 ; GFX11-NEXT: ; return to shader part epilog
131 ; GFX12-LABEL: tbuffer_load_immoffs_large:
133 ; GFX12-NEXT: v_mov_b32_e32 v8, 0
134 ; GFX12-NEXT: s_mov_b32 s5, 61
135 ; GFX12-NEXT: s_clause 0x2
136 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095
137 ; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73
138 ; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1
139 ; GFX12-NEXT: s_wait_loadcnt 0x0
140 ; GFX12-NEXT: ; return to shader part epilog
141 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 4095, i32 61, i32 47, i32 0)
142 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 73, i32 %soffs, i32 62, i32 0)
143 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 1, i32 %soffs, i32 77, i32 0)
144 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
145 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
146 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
147 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
148 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
149 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
150 ret {<4 x float>, <4 x float>, <4 x float>} %r2
153 define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) {
154 ; PREGFX10-LABEL: tbuffer_load_idx:
155 ; PREGFX10: ; %bb.0: ; %main_body
156 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen
157 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
158 ; PREGFX10-NEXT: ; return to shader part epilog
160 ; GFX10-LABEL: tbuffer_load_idx:
161 ; GFX10: ; %bb.0: ; %main_body
162 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen
163 ; GFX10-NEXT: s_waitcnt vmcnt(0)
164 ; GFX10-NEXT: ; return to shader part epilog
166 ; GFX11-LABEL: tbuffer_load_idx:
167 ; GFX11: ; %bb.0: ; %main_body
168 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen
169 ; GFX11-NEXT: s_waitcnt vmcnt(0)
170 ; GFX11-NEXT: ; return to shader part epilog
172 ; GFX12-LABEL: tbuffer_load_idx:
173 ; GFX12: ; %bb.0: ; %main_body
174 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen
175 ; GFX12-NEXT: s_wait_loadcnt 0x0
176 ; GFX12-NEXT: ; return to shader part epilog
178 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0)
179 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
180 ret <4 x float> %vdata.f
183 define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
184 ; PREGFX10-LABEL: tbuffer_load_ofs:
185 ; PREGFX10: ; %bb.0: ; %main_body
186 ; PREGFX10-NEXT: s_mov_b32 s4, 0
187 ; PREGFX10-NEXT: v_mov_b32_e32 v1, v0
188 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
189 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen
190 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
191 ; PREGFX10-NEXT: ; return to shader part epilog
193 ; GFX10-LABEL: tbuffer_load_ofs:
194 ; GFX10: ; %bb.0: ; %main_body
195 ; GFX10-NEXT: s_mov_b32 s4, 0
196 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
197 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
198 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
199 ; GFX10-NEXT: s_waitcnt vmcnt(0)
200 ; GFX10-NEXT: ; return to shader part epilog
202 ; GFX11-LABEL: tbuffer_load_ofs:
203 ; GFX11: ; %bb.0: ; %main_body
204 ; GFX11-NEXT: s_mov_b32 s4, 0
205 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
206 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
207 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
208 ; GFX11-NEXT: s_waitcnt vmcnt(0)
209 ; GFX11-NEXT: ; return to shader part epilog
211 ; GFX12-LABEL: tbuffer_load_ofs:
212 ; GFX12: ; %bb.0: ; %main_body
213 ; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
214 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen
215 ; GFX12-NEXT: s_wait_loadcnt 0x0
216 ; GFX12-NEXT: ; return to shader part epilog
218 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0)
219 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
220 ret <4 x float> %vdata.f
223 define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
224 ; PREGFX10-LABEL: tbuffer_load_ofs_imm:
225 ; PREGFX10: ; %bb.0: ; %main_body
226 ; PREGFX10-NEXT: s_mov_b32 s4, 0
227 ; PREGFX10-NEXT: v_mov_b32_e32 v1, v0
228 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
229 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen offset:52
230 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
231 ; PREGFX10-NEXT: ; return to shader part epilog
233 ; GFX10-LABEL: tbuffer_load_ofs_imm:
234 ; GFX10: ; %bb.0: ; %main_body
235 ; GFX10-NEXT: s_mov_b32 s4, 0
236 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
237 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
238 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52
239 ; GFX10-NEXT: s_waitcnt vmcnt(0)
240 ; GFX10-NEXT: ; return to shader part epilog
242 ; GFX11-LABEL: tbuffer_load_ofs_imm:
243 ; GFX11: ; %bb.0: ; %main_body
244 ; GFX11-NEXT: s_mov_b32 s4, 0
245 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
246 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
247 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52
248 ; GFX11-NEXT: s_waitcnt vmcnt(0)
249 ; GFX11-NEXT: ; return to shader part epilog
251 ; GFX12-LABEL: tbuffer_load_ofs_imm:
252 ; GFX12: ; %bb.0: ; %main_body
253 ; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
254 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen offset:52
255 ; GFX12-NEXT: s_wait_loadcnt 0x0
256 ; GFX12-NEXT: ; return to shader part epilog
258 %ofs = add i32 %voffs, 52
259 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %ofs, i32 0, i32 78, i32 0)
260 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
261 ret <4 x float> %vdata.f
264 define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) {
265 ; PREGFX10-LABEL: tbuffer_load_both:
266 ; PREGFX10: ; %bb.0: ; %main_body
267 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen
268 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
269 ; PREGFX10-NEXT: ; return to shader part epilog
271 ; GFX10-LABEL: tbuffer_load_both:
272 ; GFX10: ; %bb.0: ; %main_body
273 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
274 ; GFX10-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10-NEXT: ; return to shader part epilog
277 ; GFX11-LABEL: tbuffer_load_both:
278 ; GFX11: ; %bb.0: ; %main_body
279 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
280 ; GFX11-NEXT: s_waitcnt vmcnt(0)
281 ; GFX11-NEXT: ; return to shader part epilog
283 ; GFX12-LABEL: tbuffer_load_both:
284 ; GFX12: ; %bb.0: ; %main_body
285 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen
286 ; GFX12-NEXT: s_wait_loadcnt 0x0
287 ; GFX12-NEXT: ; return to shader part epilog
289 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0)
290 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
291 ret <4 x float> %vdata.f
294 define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
295 ; PREGFX10-LABEL: buffer_load_xy:
297 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0
298 ; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen
299 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
300 ; PREGFX10-NEXT: ; return to shader part epilog
302 ; GFX10-LABEL: buffer_load_xy:
304 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
305 ; GFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen
306 ; GFX10-NEXT: s_waitcnt vmcnt(0)
307 ; GFX10-NEXT: ; return to shader part epilog
309 ; GFX11-LABEL: buffer_load_xy:
311 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
312 ; GFX11-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:77 idxen
313 ; GFX11-NEXT: s_waitcnt vmcnt(0)
314 ; GFX11-NEXT: ; return to shader part epilog
316 ; GFX12-LABEL: buffer_load_xy:
318 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
319 ; GFX12-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], null format:77 idxen
320 ; GFX12-NEXT: s_wait_loadcnt 0x0
321 ; GFX12-NEXT: ; return to shader part epilog
322 %vdata = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
323 %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
324 ret <2 x float> %vdata.f
327 define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
328 ; PREGFX10-LABEL: buffer_load_x:
330 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0
331 ; PREGFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen
332 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
333 ; PREGFX10-NEXT: ; return to shader part epilog
335 ; GFX10-LABEL: buffer_load_x:
337 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
338 ; GFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen
339 ; GFX10-NEXT: s_waitcnt vmcnt(0)
340 ; GFX10-NEXT: ; return to shader part epilog
342 ; GFX11-LABEL: buffer_load_x:
344 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
345 ; GFX11-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:77 idxen
346 ; GFX11-NEXT: s_waitcnt vmcnt(0)
347 ; GFX11-NEXT: ; return to shader part epilog
349 ; GFX12-LABEL: buffer_load_x:
351 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
352 ; GFX12-NEXT: tbuffer_load_format_x v0, v0, s[0:3], null format:77 idxen
353 ; GFX12-NEXT: s_wait_loadcnt 0x0
354 ; GFX12-NEXT: ; return to shader part epilog
355 %vdata = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
356 %vdata.f = bitcast i32 %vdata to float
360 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
361 ; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
362 ; PREGFX10: ; %bb.0: ; %main_body
363 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0
364 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
365 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
366 ; PREGFX10-NEXT: ; return to shader part epilog
368 ; GFX10-LABEL: buffer_load_voffset_large_12bit:
369 ; GFX10: ; %bb.0: ; %main_body
370 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
371 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
372 ; GFX10-NEXT: s_waitcnt vmcnt(0)
373 ; GFX10-NEXT: ; return to shader part epilog
375 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
376 ; GFX11: ; %bb.0: ; %main_body
377 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
378 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
379 ; GFX11-NEXT: s_waitcnt vmcnt(0)
380 ; GFX11-NEXT: ; return to shader part epilog
382 ; GFX12-LABEL: buffer_load_voffset_large_12bit:
383 ; GFX12: ; %bb.0: ; %main_body
384 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
385 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
386 ; GFX12-NEXT: s_wait_loadcnt 0x0
387 ; GFX12-NEXT: ; return to shader part epilog
389 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 63, i32 0)
390 ret <4 x float> %data
393 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
394 ; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
395 ; PREGFX10: ; %bb.0: ; %main_body
396 ; PREGFX10-NEXT: s_mov_b32 s4, 0
397 ; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x1000
398 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
399 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
400 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
401 ; PREGFX10-NEXT: ; return to shader part epilog
403 ; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
404 ; GFX10: ; %bb.0: ; %main_body
405 ; GFX10-NEXT: s_mov_b32 s4, 0
406 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
407 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
408 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
409 ; GFX10-NEXT: s_waitcnt vmcnt(0)
410 ; GFX10-NEXT: ; return to shader part epilog
412 ; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
413 ; GFX11: ; %bb.0: ; %main_body
414 ; GFX11-NEXT: s_mov_b32 s4, 0
415 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
416 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
417 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
418 ; GFX11-NEXT: s_waitcnt vmcnt(0)
419 ; GFX11-NEXT: ; return to shader part epilog
421 ; GFX12-LABEL: tbuffer_load_voffset_large_13bit:
422 ; GFX12: ; %bb.0: ; %main_body
423 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
424 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8188
425 ; GFX12-NEXT: s_wait_loadcnt 0x0
426 ; GFX12-NEXT: ; return to shader part epilog
428 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 63, i32 0)
429 ret <4 x float> %data
432 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
433 ; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
434 ; PREGFX10: ; %bb.0: ; %main_body
435 ; PREGFX10-NEXT: s_mov_b32 s4, 0
436 ; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xf000
437 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
438 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
439 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
440 ; PREGFX10-NEXT: ; return to shader part epilog
442 ; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
443 ; GFX10: ; %bb.0: ; %main_body
444 ; GFX10-NEXT: s_mov_b32 s4, 0
445 ; GFX10-NEXT: v_mov_b32_e32 v1, 0xf000
446 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
447 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
448 ; GFX10-NEXT: s_waitcnt vmcnt(0)
449 ; GFX10-NEXT: ; return to shader part epilog
451 ; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
452 ; GFX11: ; %bb.0: ; %main_body
453 ; GFX11-NEXT: s_mov_b32 s4, 0
454 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
455 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
456 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
457 ; GFX11-NEXT: s_waitcnt vmcnt(0)
458 ; GFX11-NEXT: ; return to shader part epilog
460 ; GFX12-LABEL: tbuffer_load_voffset_large_16bit:
461 ; GFX12: ; %bb.0: ; %main_body
462 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
463 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:65532
464 ; GFX12-NEXT: s_wait_loadcnt 0x0
465 ; GFX12-NEXT: ; return to shader part epilog
467 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 63, i32 0)
468 ret <4 x float> %data
471 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
472 ; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
473 ; PREGFX10: ; %bb.0: ; %main_body
474 ; PREGFX10-NEXT: s_mov_b32 s4, 0
475 ; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000
476 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
477 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
478 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
479 ; PREGFX10-NEXT: ; return to shader part epilog
481 ; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
482 ; GFX10: ; %bb.0: ; %main_body
483 ; GFX10-NEXT: s_mov_b32 s4, 0
484 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000
485 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
486 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
487 ; GFX10-NEXT: s_waitcnt vmcnt(0)
488 ; GFX10-NEXT: ; return to shader part epilog
490 ; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
491 ; GFX11: ; %bb.0: ; %main_body
492 ; GFX11-NEXT: s_mov_b32 s4, 0
493 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
494 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
495 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
496 ; GFX11-NEXT: s_waitcnt vmcnt(0)
497 ; GFX11-NEXT: ; return to shader part epilog
499 ; GFX12-LABEL: tbuffer_load_voffset_large_23bit:
500 ; GFX12: ; %bb.0: ; %main_body
501 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
502 ; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8388604
503 ; GFX12-NEXT: s_wait_loadcnt 0x0
504 ; GFX12-NEXT: ; return to shader part epilog
506 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 63, i32 0)
507 ret <4 x float> %data
510 define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
511 ; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
512 ; PREGFX10: ; %bb.0: ; %main_body
513 ; PREGFX10-NEXT: s_mov_b32 s4, 0
514 ; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xfff000
515 ; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
516 ; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
517 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
518 ; PREGFX10-NEXT: ; return to shader part epilog
520 ; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
521 ; GFX10: ; %bb.0: ; %main_body
522 ; GFX10-NEXT: s_mov_b32 s4, 0
523 ; GFX10-NEXT: v_mov_b32_e32 v1, 0xfff000
524 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
525 ; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
526 ; GFX10-NEXT: s_waitcnt vmcnt(0)
527 ; GFX10-NEXT: ; return to shader part epilog
529 ; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
530 ; GFX11: ; %bb.0: ; %main_body
531 ; GFX11-NEXT: s_mov_b32 s4, 0
532 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
533 ; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
534 ; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
535 ; GFX11-NEXT: s_waitcnt vmcnt(0)
536 ; GFX11-NEXT: ; return to shader part epilog
538 ; GFX12-SDAG-LABEL: tbuffer_load_voffset_large_24bit:
539 ; GFX12-SDAG: ; %bb.0: ; %main_body
540 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 0x800000 :: v_dual_mov_b32 v0, 0
541 ; GFX12-SDAG-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604
542 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
543 ; GFX12-SDAG-NEXT: ; return to shader part epilog
545 ; GFX12-GISEL-LABEL: tbuffer_load_voffset_large_24bit:
546 ; GFX12-GISEL: ; %bb.0: ; %main_body
547 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x800000
548 ; GFX12-GISEL-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604
549 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
550 ; GFX12-GISEL-NEXT: ; return to shader part epilog
552 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
553 ret <4 x float> %data
556 declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32)
557 declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32)
558 declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32)
559 declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32)