1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
7 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
8 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw:
10 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
11 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
12 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
13 ; GFX9-NEXT: s_mov_b32 s0, s2
14 ; GFX9-NEXT: s_mov_b32 s1, s3
15 ; GFX9-NEXT: s_mov_b32 s2, s4
16 ; GFX9-NEXT: s_mov_b32 s3, s5
17 ; GFX9-NEXT: s_mov_b32 s4, s6
18 ; GFX9-NEXT: s_mov_b32 s5, s7
19 ; GFX9-NEXT: s_mov_b32 s6, s8
20 ; GFX9-NEXT: s_mov_b32 s7, s9
21 ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
22 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
23 ; GFX9-NEXT: s_waitcnt vmcnt(0)
24 ; GFX9-NEXT: ; return to shader part epilog
26 ; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw:
28 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
29 ; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0xffff, v2
30 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
31 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
32 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
33 ; GFX10PLUS-NEXT: v_lshl_or_b32 v0, v1, 16, v0
34 ; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v3, 16, v2
35 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
36 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
37 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
38 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
39 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
40 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
41 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
42 ; GFX10PLUS-NEXT: ; return to shader part epilog
44 ; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw:
46 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
47 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
48 ; GFX12-NEXT: s_mov_b32 s0, s2
49 ; GFX12-NEXT: s_mov_b32 s1, s3
50 ; GFX12-NEXT: s_mov_b32 s2, s4
51 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
52 ; GFX12-NEXT: v_lshl_or_b32 v1, v3, 16, v2
53 ; GFX12-NEXT: s_mov_b32 s3, s5
54 ; GFX12-NEXT: s_mov_b32 s4, s6
55 ; GFX12-NEXT: s_mov_b32 s5, s7
56 ; GFX12-NEXT: s_mov_b32 s6, s8
57 ; GFX12-NEXT: s_mov_b32 s7, s9
58 ; GFX12-NEXT: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16
59 ; GFX12-NEXT: s_wait_loadcnt 0x0
60 ; GFX12-NEXT: ; return to shader part epilog
61 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
65 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
66 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
68 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
69 ; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
70 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
71 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
72 ; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
73 ; GFX9-NEXT: v_mov_b32_e32 v6, v5
74 ; GFX9-NEXT: v_mov_b32_e32 v7, v5
75 ; GFX9-NEXT: v_mov_b32_e32 v8, v5
76 ; GFX9-NEXT: v_mov_b32_e32 v9, v5
77 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
78 ; GFX9-NEXT: s_mov_b32 s0, s2
79 ; GFX9-NEXT: s_mov_b32 s1, s3
80 ; GFX9-NEXT: s_mov_b32 s2, s4
81 ; GFX9-NEXT: s_mov_b32 s3, s5
82 ; GFX9-NEXT: s_mov_b32 s4, s6
83 ; GFX9-NEXT: s_mov_b32 s5, s7
84 ; GFX9-NEXT: s_mov_b32 s6, s8
85 ; GFX9-NEXT: s_mov_b32 s7, s9
86 ; GFX9-NEXT: v_mov_b32_e32 v1, v6
87 ; GFX9-NEXT: v_mov_b32_e32 v2, v7
88 ; GFX9-NEXT: v_mov_b32_e32 v3, v8
89 ; GFX9-NEXT: v_mov_b32_e32 v4, v9
90 ; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe da
91 ; GFX9-NEXT: s_waitcnt vmcnt(0)
92 ; GFX9-NEXT: global_store_dword v5, v4, s[10:11]
93 ; GFX9-NEXT: s_waitcnt vmcnt(0)
94 ; GFX9-NEXT: ; return to shader part epilog
96 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
98 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
99 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
100 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
101 ; GFX10-NEXT: s_mov_b32 s0, s2
102 ; GFX10-NEXT: s_mov_b32 s1, s3
103 ; GFX10-NEXT: v_mov_b32_e32 v6, v5
104 ; GFX10-NEXT: v_mov_b32_e32 v7, v5
105 ; GFX10-NEXT: v_mov_b32_e32 v8, v5
106 ; GFX10-NEXT: v_mov_b32_e32 v9, v5
107 ; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
108 ; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
109 ; GFX10-NEXT: s_mov_b32 s2, s4
110 ; GFX10-NEXT: s_mov_b32 s3, s5
111 ; GFX10-NEXT: s_mov_b32 s4, s6
112 ; GFX10-NEXT: s_mov_b32 s5, s7
113 ; GFX10-NEXT: s_mov_b32 s6, s8
114 ; GFX10-NEXT: s_mov_b32 s7, s9
115 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
116 ; GFX10-NEXT: v_mov_b32_e32 v1, v6
117 ; GFX10-NEXT: v_mov_b32_e32 v2, v7
118 ; GFX10-NEXT: v_mov_b32_e32 v3, v8
119 ; GFX10-NEXT: v_mov_b32_e32 v4, v9
120 ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
121 ; GFX10-NEXT: s_waitcnt vmcnt(0)
122 ; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
123 ; GFX10-NEXT: ; return to shader part epilog
125 ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
127 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
128 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
129 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
130 ; GFX11-NEXT: s_mov_b32 s0, s2
131 ; GFX11-NEXT: s_mov_b32 s1, s3
132 ; GFX11-NEXT: v_mov_b32_e32 v6, v5
133 ; GFX11-NEXT: v_mov_b32_e32 v7, v5
134 ; GFX11-NEXT: v_mov_b32_e32 v8, v5
135 ; GFX11-NEXT: v_mov_b32_e32 v9, v5
136 ; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
137 ; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
138 ; GFX11-NEXT: s_mov_b32 s2, s4
139 ; GFX11-NEXT: s_mov_b32 s3, s5
140 ; GFX11-NEXT: s_mov_b32 s4, s6
141 ; GFX11-NEXT: s_mov_b32 s5, s7
142 ; GFX11-NEXT: s_mov_b32 s6, s8
143 ; GFX11-NEXT: s_mov_b32 s7, s9
144 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
145 ; GFX11-NEXT: v_mov_b32_e32 v1, v6
146 ; GFX11-NEXT: v_mov_b32_e32 v2, v7
147 ; GFX11-NEXT: v_mov_b32_e32 v3, v8
148 ; GFX11-NEXT: v_mov_b32_e32 v4, v9
149 ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
150 ; GFX11-NEXT: s_waitcnt vmcnt(0)
151 ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
152 ; GFX11-NEXT: ; return to shader part epilog
154 ; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
156 ; GFX12-NEXT: v_mov_b32_e32 v5, 0
157 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
158 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
159 ; GFX12-NEXT: s_mov_b32 s0, s2
160 ; GFX12-NEXT: s_mov_b32 s1, s3
161 ; GFX12-NEXT: v_mov_b32_e32 v6, v5
162 ; GFX12-NEXT: v_mov_b32_e32 v7, v5
163 ; GFX12-NEXT: v_mov_b32_e32 v8, v5
164 ; GFX12-NEXT: v_mov_b32_e32 v9, v5
165 ; GFX12-NEXT: v_lshl_or_b32 v10, v1, 16, v0
166 ; GFX12-NEXT: v_lshl_or_b32 v11, v3, 16, v2
167 ; GFX12-NEXT: s_mov_b32 s2, s4
168 ; GFX12-NEXT: s_mov_b32 s3, s5
169 ; GFX12-NEXT: s_mov_b32 s4, s6
170 ; GFX12-NEXT: s_mov_b32 s5, s7
171 ; GFX12-NEXT: s_mov_b32 s6, s8
172 ; GFX12-NEXT: s_mov_b32 s7, s9
173 ; GFX12-NEXT: v_mov_b32_e32 v0, v5
174 ; GFX12-NEXT: v_mov_b32_e32 v1, v6
175 ; GFX12-NEXT: v_mov_b32_e32 v2, v7
176 ; GFX12-NEXT: v_mov_b32_e32 v3, v8
177 ; GFX12-NEXT: v_mov_b32_e32 v4, v9
178 ; GFX12-NEXT: image_load v[0:4], [v10, v11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 tfe
179 ; GFX12-NEXT: s_wait_loadcnt 0x0
180 ; GFX12-NEXT: global_store_b32 v5, v4, s[10:11]
181 ; GFX12-NEXT: ; return to shader part epilog
182 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
183 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
184 %v.err = extractvalue { <4 x float>, i32 } %v, 1
185 store i32 %v.err, ptr addrspace(1) %out, align 4
186 ret <4 x float> %v.vec
189 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
190 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
192 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
193 ; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
194 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
195 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
196 ; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
197 ; GFX9-NEXT: v_mov_b32_e32 v6, v5
198 ; GFX9-NEXT: v_mov_b32_e32 v7, v5
199 ; GFX9-NEXT: v_mov_b32_e32 v8, v5
200 ; GFX9-NEXT: v_mov_b32_e32 v9, v5
201 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
202 ; GFX9-NEXT: s_mov_b32 s0, s2
203 ; GFX9-NEXT: s_mov_b32 s1, s3
204 ; GFX9-NEXT: s_mov_b32 s2, s4
205 ; GFX9-NEXT: s_mov_b32 s3, s5
206 ; GFX9-NEXT: s_mov_b32 s4, s6
207 ; GFX9-NEXT: s_mov_b32 s5, s7
208 ; GFX9-NEXT: s_mov_b32 s6, s8
209 ; GFX9-NEXT: s_mov_b32 s7, s9
210 ; GFX9-NEXT: v_mov_b32_e32 v1, v6
211 ; GFX9-NEXT: v_mov_b32_e32 v2, v7
212 ; GFX9-NEXT: v_mov_b32_e32 v3, v8
213 ; GFX9-NEXT: v_mov_b32_e32 v4, v9
214 ; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe lwe da
215 ; GFX9-NEXT: s_waitcnt vmcnt(0)
216 ; GFX9-NEXT: global_store_dword v5, v4, s[10:11]
217 ; GFX9-NEXT: s_waitcnt vmcnt(0)
218 ; GFX9-NEXT: ; return to shader part epilog
220 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
222 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
223 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
224 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
225 ; GFX10-NEXT: s_mov_b32 s0, s2
226 ; GFX10-NEXT: s_mov_b32 s1, s3
227 ; GFX10-NEXT: v_mov_b32_e32 v6, v5
228 ; GFX10-NEXT: v_mov_b32_e32 v7, v5
229 ; GFX10-NEXT: v_mov_b32_e32 v8, v5
230 ; GFX10-NEXT: v_mov_b32_e32 v9, v5
231 ; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
232 ; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
233 ; GFX10-NEXT: s_mov_b32 s2, s4
234 ; GFX10-NEXT: s_mov_b32 s3, s5
235 ; GFX10-NEXT: s_mov_b32 s4, s6
236 ; GFX10-NEXT: s_mov_b32 s5, s7
237 ; GFX10-NEXT: s_mov_b32 s6, s8
238 ; GFX10-NEXT: s_mov_b32 s7, s9
239 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
240 ; GFX10-NEXT: v_mov_b32_e32 v1, v6
241 ; GFX10-NEXT: v_mov_b32_e32 v2, v7
242 ; GFX10-NEXT: v_mov_b32_e32 v3, v8
243 ; GFX10-NEXT: v_mov_b32_e32 v4, v9
244 ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
245 ; GFX10-NEXT: s_waitcnt vmcnt(0)
246 ; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
247 ; GFX10-NEXT: ; return to shader part epilog
249 ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
251 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
252 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
253 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
254 ; GFX11-NEXT: s_mov_b32 s0, s2
255 ; GFX11-NEXT: s_mov_b32 s1, s3
256 ; GFX11-NEXT: v_mov_b32_e32 v6, v5
257 ; GFX11-NEXT: v_mov_b32_e32 v7, v5
258 ; GFX11-NEXT: v_mov_b32_e32 v8, v5
259 ; GFX11-NEXT: v_mov_b32_e32 v9, v5
260 ; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
261 ; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
262 ; GFX11-NEXT: s_mov_b32 s2, s4
263 ; GFX11-NEXT: s_mov_b32 s3, s5
264 ; GFX11-NEXT: s_mov_b32 s4, s6
265 ; GFX11-NEXT: s_mov_b32 s5, s7
266 ; GFX11-NEXT: s_mov_b32 s6, s8
267 ; GFX11-NEXT: s_mov_b32 s7, s9
268 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
269 ; GFX11-NEXT: v_mov_b32_e32 v1, v6
270 ; GFX11-NEXT: v_mov_b32_e32 v2, v7
271 ; GFX11-NEXT: v_mov_b32_e32 v3, v8
272 ; GFX11-NEXT: v_mov_b32_e32 v4, v9
273 ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
274 ; GFX11-NEXT: s_waitcnt vmcnt(0)
275 ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
276 ; GFX11-NEXT: ; return to shader part epilog
278 ; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
280 ; GFX12-NEXT: v_mov_b32_e32 v5, 0
281 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
282 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
283 ; GFX12-NEXT: s_mov_b32 s0, s2
284 ; GFX12-NEXT: s_mov_b32 s1, s3
285 ; GFX12-NEXT: v_mov_b32_e32 v6, v5
286 ; GFX12-NEXT: v_mov_b32_e32 v7, v5
287 ; GFX12-NEXT: v_mov_b32_e32 v8, v5
288 ; GFX12-NEXT: v_mov_b32_e32 v9, v5
289 ; GFX12-NEXT: v_lshl_or_b32 v10, v1, 16, v0
290 ; GFX12-NEXT: v_lshl_or_b32 v11, v3, 16, v2
291 ; GFX12-NEXT: s_mov_b32 s2, s4
292 ; GFX12-NEXT: s_mov_b32 s3, s5
293 ; GFX12-NEXT: s_mov_b32 s4, s6
294 ; GFX12-NEXT: s_mov_b32 s5, s7
295 ; GFX12-NEXT: s_mov_b32 s6, s8
296 ; GFX12-NEXT: s_mov_b32 s7, s9
297 ; GFX12-NEXT: v_mov_b32_e32 v0, v5
298 ; GFX12-NEXT: v_mov_b32_e32 v1, v6
299 ; GFX12-NEXT: v_mov_b32_e32 v2, v7
300 ; GFX12-NEXT: v_mov_b32_e32 v3, v8
301 ; GFX12-NEXT: v_mov_b32_e32 v4, v9
302 ; GFX12-NEXT: image_load v[0:4], [v10, v11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 tfe
303 ; GFX12-NEXT: s_wait_loadcnt 0x0
304 ; GFX12-NEXT: global_store_b32 v5, v4, s[10:11]
305 ; GFX12-NEXT: ; return to shader part epilog
306 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
307 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
308 %v.err = extractvalue { <4 x float>, i32 } %v, 1
309 store i32 %v.err, ptr addrspace(1) %out, align 4
310 ret <4 x float> %v.vec
313 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
314 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
316 attributes #0 = { nounwind readonly }