1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
7 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
8 ; GFX6-LABEL: load_2d_v4f32_xyzw:
10 ; GFX6-NEXT: s_mov_b32 s0, s2
11 ; GFX6-NEXT: s_mov_b32 s1, s3
12 ; GFX6-NEXT: s_mov_b32 s2, s4
13 ; GFX6-NEXT: s_mov_b32 s3, s5
14 ; GFX6-NEXT: s_mov_b32 s4, s6
15 ; GFX6-NEXT: s_mov_b32 s5, s7
16 ; GFX6-NEXT: s_mov_b32 s6, s8
17 ; GFX6-NEXT: s_mov_b32 s7, s9
18 ; GFX6-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
19 ; GFX6-NEXT: s_waitcnt vmcnt(0)
20 ; GFX6-NEXT: ; return to shader part epilog
22 ; GFX10PLUS-LABEL: load_2d_v4f32_xyzw:
24 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
25 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
26 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
27 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
28 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
29 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
30 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
31 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
32 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
33 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
34 ; GFX10PLUS-NEXT: ; return to shader part epilog
36 ; GFX12-LABEL: load_2d_v4f32_xyzw:
38 ; GFX12-NEXT: s_mov_b32 s0, s2
39 ; GFX12-NEXT: s_mov_b32 s1, s3
40 ; GFX12-NEXT: s_mov_b32 s2, s4
41 ; GFX12-NEXT: s_mov_b32 s3, s5
42 ; GFX12-NEXT: s_mov_b32 s4, s6
43 ; GFX12-NEXT: s_mov_b32 s5, s7
44 ; GFX12-NEXT: s_mov_b32 s6, s8
45 ; GFX12-NEXT: s_mov_b32 s7, s9
46 ; GFX12-NEXT: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D
47 ; GFX12-NEXT: s_wait_loadcnt 0x0
48 ; GFX12-NEXT: ; return to shader part epilog
49 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
53 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
54 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe:
56 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
57 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
58 ; GFX6-NEXT: s_mov_b32 s0, s2
59 ; GFX6-NEXT: s_mov_b32 s1, s3
60 ; GFX6-NEXT: s_mov_b32 s2, s4
61 ; GFX6-NEXT: s_mov_b32 s3, s5
62 ; GFX6-NEXT: s_mov_b32 s4, s6
63 ; GFX6-NEXT: s_mov_b32 s5, s7
64 ; GFX6-NEXT: s_mov_b32 s6, s8
65 ; GFX6-NEXT: s_mov_b32 s7, s9
66 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
67 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
68 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
69 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
70 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
71 ; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe
72 ; GFX6-NEXT: s_mov_b32 s8, s10
73 ; GFX6-NEXT: s_mov_b32 s9, s11
74 ; GFX6-NEXT: s_mov_b32 s10, -1
75 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
76 ; GFX6-NEXT: s_waitcnt vmcnt(0)
77 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
78 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
79 ; GFX6-NEXT: ; return to shader part epilog
81 ; GFX10-LABEL: load_2d_v4f32_xyzw_tfe:
83 ; GFX10-NEXT: v_mov_b32_e32 v7, 0
84 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
85 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
86 ; GFX10-NEXT: s_mov_b32 s0, s2
87 ; GFX10-NEXT: s_mov_b32 s1, s3
88 ; GFX10-NEXT: v_mov_b32_e32 v8, v7
89 ; GFX10-NEXT: v_mov_b32_e32 v9, v7
90 ; GFX10-NEXT: v_mov_b32_e32 v10, v7
91 ; GFX10-NEXT: v_mov_b32_e32 v11, v7
92 ; GFX10-NEXT: s_mov_b32 s2, s4
93 ; GFX10-NEXT: s_mov_b32 s3, s5
94 ; GFX10-NEXT: s_mov_b32 s4, s6
95 ; GFX10-NEXT: s_mov_b32 s5, s7
96 ; GFX10-NEXT: s_mov_b32 s6, s8
97 ; GFX10-NEXT: s_mov_b32 s7, s9
98 ; GFX10-NEXT: v_mov_b32_e32 v0, v7
99 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
100 ; GFX10-NEXT: v_mov_b32_e32 v2, v9
101 ; GFX10-NEXT: v_mov_b32_e32 v3, v10
102 ; GFX10-NEXT: v_mov_b32_e32 v4, v11
103 ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
104 ; GFX10-NEXT: s_waitcnt vmcnt(0)
105 ; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
106 ; GFX10-NEXT: ; return to shader part epilog
108 ; GFX11-LABEL: load_2d_v4f32_xyzw_tfe:
110 ; GFX11-NEXT: v_mov_b32_e32 v7, 0
111 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1
112 ; GFX11-NEXT: s_mov_b32 s0, s2
113 ; GFX11-NEXT: s_mov_b32 s1, s3
114 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
115 ; GFX11-NEXT: v_mov_b32_e32 v8, v7
116 ; GFX11-NEXT: v_mov_b32_e32 v9, v7
117 ; GFX11-NEXT: v_mov_b32_e32 v10, v7
118 ; GFX11-NEXT: v_mov_b32_e32 v11, v7
119 ; GFX11-NEXT: s_mov_b32 s2, s4
120 ; GFX11-NEXT: s_mov_b32 s3, s5
121 ; GFX11-NEXT: s_mov_b32 s4, s6
122 ; GFX11-NEXT: s_mov_b32 s5, s7
123 ; GFX11-NEXT: s_mov_b32 s6, s8
124 ; GFX11-NEXT: s_mov_b32 s7, s9
125 ; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
126 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
127 ; GFX11-NEXT: v_mov_b32_e32 v4, v11
128 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
129 ; GFX11-NEXT: s_waitcnt vmcnt(0)
130 ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
131 ; GFX11-NEXT: ; return to shader part epilog
133 ; GFX12-LABEL: load_2d_v4f32_xyzw_tfe:
135 ; GFX12-NEXT: v_mov_b32_e32 v7, 0
136 ; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1
137 ; GFX12-NEXT: s_mov_b32 s0, s2
138 ; GFX12-NEXT: s_mov_b32 s1, s3
139 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
140 ; GFX12-NEXT: v_dual_mov_b32 v8, v7 :: v_dual_mov_b32 v9, v7
141 ; GFX12-NEXT: v_dual_mov_b32 v10, v7 :: v_dual_mov_b32 v11, v7
142 ; GFX12-NEXT: s_mov_b32 s2, s4
143 ; GFX12-NEXT: s_mov_b32 s3, s5
144 ; GFX12-NEXT: s_mov_b32 s4, s6
145 ; GFX12-NEXT: s_mov_b32 s5, s7
146 ; GFX12-NEXT: s_mov_b32 s6, s8
147 ; GFX12-NEXT: s_mov_b32 s7, s9
148 ; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
149 ; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
150 ; GFX12-NEXT: v_mov_b32_e32 v4, v11
151 ; GFX12-NEXT: image_load v[0:4], [v5, v6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
152 ; GFX12-NEXT: s_wait_loadcnt 0x0
153 ; GFX12-NEXT: global_store_b32 v7, v4, s[10:11]
154 ; GFX12-NEXT: ; return to shader part epilog
155 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
156 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
157 %v.err = extractvalue { <4 x float>, i32 } %v, 1
158 store i32 %v.err, ptr addrspace(1) %out, align 4
159 ret <4 x float> %v.vec
162 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
163 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
165 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
166 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
167 ; GFX6-NEXT: s_mov_b32 s0, s2
168 ; GFX6-NEXT: s_mov_b32 s1, s3
169 ; GFX6-NEXT: s_mov_b32 s2, s4
170 ; GFX6-NEXT: s_mov_b32 s3, s5
171 ; GFX6-NEXT: s_mov_b32 s4, s6
172 ; GFX6-NEXT: s_mov_b32 s5, s7
173 ; GFX6-NEXT: s_mov_b32 s6, s8
174 ; GFX6-NEXT: s_mov_b32 s7, s9
175 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
176 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
177 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
178 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
179 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
180 ; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe lwe
181 ; GFX6-NEXT: s_mov_b32 s8, s10
182 ; GFX6-NEXT: s_mov_b32 s9, s11
183 ; GFX6-NEXT: s_mov_b32 s10, -1
184 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
185 ; GFX6-NEXT: s_waitcnt vmcnt(0)
186 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
187 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
188 ; GFX6-NEXT: ; return to shader part epilog
190 ; GFX10-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
192 ; GFX10-NEXT: v_mov_b32_e32 v7, 0
193 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
194 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
195 ; GFX10-NEXT: s_mov_b32 s0, s2
196 ; GFX10-NEXT: s_mov_b32 s1, s3
197 ; GFX10-NEXT: v_mov_b32_e32 v8, v7
198 ; GFX10-NEXT: v_mov_b32_e32 v9, v7
199 ; GFX10-NEXT: v_mov_b32_e32 v10, v7
200 ; GFX10-NEXT: v_mov_b32_e32 v11, v7
201 ; GFX10-NEXT: s_mov_b32 s2, s4
202 ; GFX10-NEXT: s_mov_b32 s3, s5
203 ; GFX10-NEXT: s_mov_b32 s4, s6
204 ; GFX10-NEXT: s_mov_b32 s5, s7
205 ; GFX10-NEXT: s_mov_b32 s6, s8
206 ; GFX10-NEXT: s_mov_b32 s7, s9
207 ; GFX10-NEXT: v_mov_b32_e32 v0, v7
208 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
209 ; GFX10-NEXT: v_mov_b32_e32 v2, v9
210 ; GFX10-NEXT: v_mov_b32_e32 v3, v10
211 ; GFX10-NEXT: v_mov_b32_e32 v4, v11
212 ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe
213 ; GFX10-NEXT: s_waitcnt vmcnt(0)
214 ; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
215 ; GFX10-NEXT: ; return to shader part epilog
217 ; GFX11-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
219 ; GFX11-NEXT: v_mov_b32_e32 v7, 0
220 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1
221 ; GFX11-NEXT: s_mov_b32 s0, s2
222 ; GFX11-NEXT: s_mov_b32 s1, s3
223 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
224 ; GFX11-NEXT: v_mov_b32_e32 v8, v7
225 ; GFX11-NEXT: v_mov_b32_e32 v9, v7
226 ; GFX11-NEXT: v_mov_b32_e32 v10, v7
227 ; GFX11-NEXT: v_mov_b32_e32 v11, v7
228 ; GFX11-NEXT: s_mov_b32 s2, s4
229 ; GFX11-NEXT: s_mov_b32 s3, s5
230 ; GFX11-NEXT: s_mov_b32 s4, s6
231 ; GFX11-NEXT: s_mov_b32 s5, s7
232 ; GFX11-NEXT: s_mov_b32 s6, s8
233 ; GFX11-NEXT: s_mov_b32 s7, s9
234 ; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
235 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
236 ; GFX11-NEXT: v_mov_b32_e32 v4, v11
237 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe
238 ; GFX11-NEXT: s_waitcnt vmcnt(0)
239 ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
240 ; GFX11-NEXT: ; return to shader part epilog
242 ; GFX12-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
244 ; GFX12-NEXT: v_mov_b32_e32 v7, 0
245 ; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1
246 ; GFX12-NEXT: s_mov_b32 s0, s2
247 ; GFX12-NEXT: s_mov_b32 s1, s3
248 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
249 ; GFX12-NEXT: v_dual_mov_b32 v8, v7 :: v_dual_mov_b32 v9, v7
250 ; GFX12-NEXT: v_dual_mov_b32 v10, v7 :: v_dual_mov_b32 v11, v7
251 ; GFX12-NEXT: s_mov_b32 s2, s4
252 ; GFX12-NEXT: s_mov_b32 s3, s5
253 ; GFX12-NEXT: s_mov_b32 s4, s6
254 ; GFX12-NEXT: s_mov_b32 s5, s7
255 ; GFX12-NEXT: s_mov_b32 s6, s8
256 ; GFX12-NEXT: s_mov_b32 s7, s9
257 ; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
258 ; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
259 ; GFX12-NEXT: v_mov_b32_e32 v4, v11
260 ; GFX12-NEXT: image_load v[0:4], [v5, v6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
261 ; GFX12-NEXT: s_wait_loadcnt 0x0
262 ; GFX12-NEXT: global_store_b32 v7, v4, s[10:11]
263 ; GFX12-NEXT: ; return to shader part epilog
264 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0)
265 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
266 %v.err = extractvalue { <4 x float>, i32 } %v, 1
267 store i32 %v.err, ptr addrspace(1) %out, align 4
268 ret <4 x float> %v.vec
271 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
272 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
274 attributes #0 = { nounwind readonly }