1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
6 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
7 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw:
9 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
10 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
11 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
12 ; GFX9-NEXT: s_mov_b32 s0, s2
13 ; GFX9-NEXT: s_mov_b32 s1, s3
14 ; GFX9-NEXT: s_mov_b32 s2, s4
15 ; GFX9-NEXT: s_mov_b32 s3, s5
16 ; GFX9-NEXT: s_mov_b32 s4, s6
17 ; GFX9-NEXT: s_mov_b32 s5, s7
18 ; GFX9-NEXT: s_mov_b32 s6, s8
19 ; GFX9-NEXT: s_mov_b32 s7, s9
20 ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
21 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
22 ; GFX9-NEXT: s_waitcnt vmcnt(0)
23 ; GFX9-NEXT: ; return to shader part epilog
25 ; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw:
27 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
28 ; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0xffff, v2
29 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
30 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
31 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
32 ; GFX10PLUS-NEXT: v_lshl_or_b32 v0, v1, 16, v0
33 ; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v3, 16, v2
34 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
35 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
36 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
37 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
38 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
39 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
40 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
41 ; GFX10PLUS-NEXT: ; return to shader part epilog
42 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
46 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
47 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
49 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
50 ; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
51 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
52 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
53 ; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
54 ; GFX9-NEXT: v_mov_b32_e32 v6, v5
55 ; GFX9-NEXT: v_mov_b32_e32 v7, v5
56 ; GFX9-NEXT: v_mov_b32_e32 v8, v5
57 ; GFX9-NEXT: v_mov_b32_e32 v9, v5
58 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
59 ; GFX9-NEXT: s_mov_b32 s0, s2
60 ; GFX9-NEXT: s_mov_b32 s1, s3
61 ; GFX9-NEXT: s_mov_b32 s2, s4
62 ; GFX9-NEXT: s_mov_b32 s3, s5
63 ; GFX9-NEXT: s_mov_b32 s4, s6
64 ; GFX9-NEXT: s_mov_b32 s5, s7
65 ; GFX9-NEXT: s_mov_b32 s6, s8
66 ; GFX9-NEXT: s_mov_b32 s7, s9
67 ; GFX9-NEXT: v_mov_b32_e32 v1, v6
68 ; GFX9-NEXT: v_mov_b32_e32 v2, v7
69 ; GFX9-NEXT: v_mov_b32_e32 v3, v8
70 ; GFX9-NEXT: v_mov_b32_e32 v4, v9
71 ; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe da
72 ; GFX9-NEXT: s_waitcnt vmcnt(0)
73 ; GFX9-NEXT: global_store_dword v5, v4, s[10:11]
74 ; GFX9-NEXT: s_waitcnt vmcnt(0)
75 ; GFX9-NEXT: ; return to shader part epilog
77 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
79 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
80 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
81 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
82 ; GFX10-NEXT: s_mov_b32 s0, s2
83 ; GFX10-NEXT: s_mov_b32 s1, s3
84 ; GFX10-NEXT: v_mov_b32_e32 v6, v5
85 ; GFX10-NEXT: v_mov_b32_e32 v7, v5
86 ; GFX10-NEXT: v_mov_b32_e32 v8, v5
87 ; GFX10-NEXT: v_mov_b32_e32 v9, v5
88 ; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
89 ; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
90 ; GFX10-NEXT: s_mov_b32 s2, s4
91 ; GFX10-NEXT: s_mov_b32 s3, s5
92 ; GFX10-NEXT: s_mov_b32 s4, s6
93 ; GFX10-NEXT: s_mov_b32 s5, s7
94 ; GFX10-NEXT: s_mov_b32 s6, s8
95 ; GFX10-NEXT: s_mov_b32 s7, s9
96 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
97 ; GFX10-NEXT: v_mov_b32_e32 v1, v6
98 ; GFX10-NEXT: v_mov_b32_e32 v2, v7
99 ; GFX10-NEXT: v_mov_b32_e32 v3, v8
100 ; GFX10-NEXT: v_mov_b32_e32 v4, v9
101 ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
102 ; GFX10-NEXT: s_waitcnt vmcnt(0)
103 ; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
104 ; GFX10-NEXT: ; return to shader part epilog
106 ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
108 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
109 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
110 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
111 ; GFX11-NEXT: s_mov_b32 s0, s2
112 ; GFX11-NEXT: s_mov_b32 s1, s3
113 ; GFX11-NEXT: v_mov_b32_e32 v6, v5
114 ; GFX11-NEXT: v_mov_b32_e32 v7, v5
115 ; GFX11-NEXT: v_mov_b32_e32 v8, v5
116 ; GFX11-NEXT: v_mov_b32_e32 v9, v5
117 ; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
118 ; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
119 ; GFX11-NEXT: s_mov_b32 s2, s4
120 ; GFX11-NEXT: s_mov_b32 s3, s5
121 ; GFX11-NEXT: s_mov_b32 s4, s6
122 ; GFX11-NEXT: s_mov_b32 s5, s7
123 ; GFX11-NEXT: s_mov_b32 s6, s8
124 ; GFX11-NEXT: s_mov_b32 s7, s9
125 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
126 ; GFX11-NEXT: v_mov_b32_e32 v1, v6
127 ; GFX11-NEXT: v_mov_b32_e32 v2, v7
128 ; GFX11-NEXT: v_mov_b32_e32 v3, v8
129 ; GFX11-NEXT: v_mov_b32_e32 v4, v9
130 ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
131 ; GFX11-NEXT: s_waitcnt vmcnt(0)
132 ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
133 ; GFX11-NEXT: ; return to shader part epilog
134 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
135 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
136 %v.err = extractvalue { <4 x float>, i32 } %v, 1
137 store i32 %v.err, ptr addrspace(1) %out, align 4
138 ret <4 x float> %v.vec
141 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
142 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
144 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
145 ; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
146 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
147 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
148 ; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
149 ; GFX9-NEXT: v_mov_b32_e32 v6, v5
150 ; GFX9-NEXT: v_mov_b32_e32 v7, v5
151 ; GFX9-NEXT: v_mov_b32_e32 v8, v5
152 ; GFX9-NEXT: v_mov_b32_e32 v9, v5
153 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
154 ; GFX9-NEXT: s_mov_b32 s0, s2
155 ; GFX9-NEXT: s_mov_b32 s1, s3
156 ; GFX9-NEXT: s_mov_b32 s2, s4
157 ; GFX9-NEXT: s_mov_b32 s3, s5
158 ; GFX9-NEXT: s_mov_b32 s4, s6
159 ; GFX9-NEXT: s_mov_b32 s5, s7
160 ; GFX9-NEXT: s_mov_b32 s6, s8
161 ; GFX9-NEXT: s_mov_b32 s7, s9
162 ; GFX9-NEXT: v_mov_b32_e32 v1, v6
163 ; GFX9-NEXT: v_mov_b32_e32 v2, v7
164 ; GFX9-NEXT: v_mov_b32_e32 v3, v8
165 ; GFX9-NEXT: v_mov_b32_e32 v4, v9
166 ; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe lwe da
167 ; GFX9-NEXT: s_waitcnt vmcnt(0)
168 ; GFX9-NEXT: global_store_dword v5, v4, s[10:11]
169 ; GFX9-NEXT: s_waitcnt vmcnt(0)
170 ; GFX9-NEXT: ; return to shader part epilog
172 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
174 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
175 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
176 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
177 ; GFX10-NEXT: s_mov_b32 s0, s2
178 ; GFX10-NEXT: s_mov_b32 s1, s3
179 ; GFX10-NEXT: v_mov_b32_e32 v6, v5
180 ; GFX10-NEXT: v_mov_b32_e32 v7, v5
181 ; GFX10-NEXT: v_mov_b32_e32 v8, v5
182 ; GFX10-NEXT: v_mov_b32_e32 v9, v5
183 ; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
184 ; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
185 ; GFX10-NEXT: s_mov_b32 s2, s4
186 ; GFX10-NEXT: s_mov_b32 s3, s5
187 ; GFX10-NEXT: s_mov_b32 s4, s6
188 ; GFX10-NEXT: s_mov_b32 s5, s7
189 ; GFX10-NEXT: s_mov_b32 s6, s8
190 ; GFX10-NEXT: s_mov_b32 s7, s9
191 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
192 ; GFX10-NEXT: v_mov_b32_e32 v1, v6
193 ; GFX10-NEXT: v_mov_b32_e32 v2, v7
194 ; GFX10-NEXT: v_mov_b32_e32 v3, v8
195 ; GFX10-NEXT: v_mov_b32_e32 v4, v9
196 ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
197 ; GFX10-NEXT: s_waitcnt vmcnt(0)
198 ; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
199 ; GFX10-NEXT: ; return to shader part epilog
201 ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
203 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
204 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
205 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
206 ; GFX11-NEXT: s_mov_b32 s0, s2
207 ; GFX11-NEXT: s_mov_b32 s1, s3
208 ; GFX11-NEXT: v_mov_b32_e32 v6, v5
209 ; GFX11-NEXT: v_mov_b32_e32 v7, v5
210 ; GFX11-NEXT: v_mov_b32_e32 v8, v5
211 ; GFX11-NEXT: v_mov_b32_e32 v9, v5
212 ; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
213 ; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
214 ; GFX11-NEXT: s_mov_b32 s2, s4
215 ; GFX11-NEXT: s_mov_b32 s3, s5
216 ; GFX11-NEXT: s_mov_b32 s4, s6
217 ; GFX11-NEXT: s_mov_b32 s5, s7
218 ; GFX11-NEXT: s_mov_b32 s6, s8
219 ; GFX11-NEXT: s_mov_b32 s7, s9
220 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
221 ; GFX11-NEXT: v_mov_b32_e32 v1, v6
222 ; GFX11-NEXT: v_mov_b32_e32 v2, v7
223 ; GFX11-NEXT: v_mov_b32_e32 v3, v8
224 ; GFX11-NEXT: v_mov_b32_e32 v4, v9
225 ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
226 ; GFX11-NEXT: s_waitcnt vmcnt(0)
227 ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
228 ; GFX11-NEXT: ; return to shader part epilog
229 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
230 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
231 %v.err = extractvalue { <4 x float>, i32 } %v, 1
232 store i32 %v.err, ptr addrspace(1) %out, align 4
233 ret <4 x float> %v.vec
236 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
237 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
239 attributes #0 = { nounwind readonly }