1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
8 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
10 define <3 x i32> @load_lds_v3i32(ptr addrspace(3) %ptr) {
11 ; GFX9-LABEL: load_lds_v3i32:
13 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX9-NEXT: ds_read_b96 v[0:2], v0
15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
16 ; GFX9-NEXT: s_setpc_b64 s[30:31]
18 ; GFX7-LABEL: load_lds_v3i32:
20 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX7-NEXT: s_mov_b32 m0, -1
22 ; GFX7-NEXT: ds_read_b96 v[0:2], v0
23 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
24 ; GFX7-NEXT: s_setpc_b64 s[30:31]
26 ; GFX10-LABEL: load_lds_v3i32:
28 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GFX10-NEXT: ds_read_b96 v[0:2], v0
30 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
31 ; GFX10-NEXT: s_setpc_b64 s[30:31]
33 ; GFX11-LABEL: load_lds_v3i32:
35 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; GFX11-NEXT: ds_load_b96 v[0:2], v0
37 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
38 ; GFX11-NEXT: s_setpc_b64 s[30:31]
39 %load = load <3 x i32>, ptr addrspace(3) %ptr
43 define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) {
44 ; GFX9-LABEL: load_lds_v3i32_align1:
46 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX9-NEXT: ds_read_u8 v1, v0
48 ; GFX9-NEXT: ds_read_u8 v2, v0 offset:1
49 ; GFX9-NEXT: ds_read_u8 v3, v0 offset:2
50 ; GFX9-NEXT: ds_read_u8 v4, v0 offset:3
51 ; GFX9-NEXT: ds_read_u8 v5, v0 offset:4
52 ; GFX9-NEXT: ds_read_u8 v6, v0 offset:5
53 ; GFX9-NEXT: ds_read_u8 v7, v0 offset:6
54 ; GFX9-NEXT: ds_read_u8 v8, v0 offset:7
55 ; GFX9-NEXT: s_waitcnt lgkmcnt(6)
56 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 8, v1
57 ; GFX9-NEXT: s_waitcnt lgkmcnt(4)
58 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v4
59 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
60 ; GFX9-NEXT: v_or3_b32 v3, v2, v3, v1
61 ; GFX9-NEXT: s_waitcnt lgkmcnt(2)
62 ; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5
63 ; GFX9-NEXT: ds_read_u8 v2, v0 offset:8
64 ; GFX9-NEXT: ds_read_u8 v4, v0 offset:9
65 ; GFX9-NEXT: ds_read_u8 v5, v0 offset:10
66 ; GFX9-NEXT: ds_read_u8 v0, v0 offset:11
67 ; GFX9-NEXT: s_waitcnt lgkmcnt(4)
68 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v8
69 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 16, v7
70 ; GFX9-NEXT: s_waitcnt lgkmcnt(2)
71 ; GFX9-NEXT: v_lshl_or_b32 v2, v4, 8, v2
72 ; GFX9-NEXT: s_waitcnt lgkmcnt(1)
73 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v5
74 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
75 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
76 ; GFX9-NEXT: v_or3_b32 v1, v6, v7, v1
77 ; GFX9-NEXT: v_or3_b32 v2, v0, v4, v2
78 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
79 ; GFX9-NEXT: s_setpc_b64 s[30:31]
81 ; GFX7-LABEL: load_lds_v3i32_align1:
83 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; GFX7-NEXT: ds_read_u8 v1, v0 offset:1
85 ; GFX7-NEXT: ds_read_u8 v2, v0
86 ; GFX7-NEXT: ds_read_u8 v3, v0 offset:2
87 ; GFX7-NEXT: s_mov_b32 m0, -1
88 ; GFX7-NEXT: s_waitcnt lgkmcnt(2)
89 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1
90 ; GFX7-NEXT: s_waitcnt lgkmcnt(1)
91 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
92 ; GFX7-NEXT: ds_read_u8 v2, v0 offset:3
93 ; GFX7-NEXT: ds_read_u8 v4, v0 offset:4
94 ; GFX7-NEXT: ds_read_u8 v5, v0 offset:5
95 ; GFX7-NEXT: ds_read_u8 v6, v0 offset:6
96 ; GFX7-NEXT: ds_read_u8 v7, v0 offset:7
97 ; GFX7-NEXT: ds_read_u8 v8, v0 offset:8
98 ; GFX7-NEXT: ds_read_u8 v9, v0 offset:9
99 ; GFX7-NEXT: ds_read_u8 v10, v0 offset:10
100 ; GFX7-NEXT: s_waitcnt lgkmcnt(7)
101 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2
102 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
103 ; GFX7-NEXT: ds_read_u8 v0, v0 offset:11
104 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
105 ; GFX7-NEXT: v_or_b32_e32 v3, v2, v1
106 ; GFX7-NEXT: s_waitcnt lgkmcnt(6)
107 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v5
108 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
109 ; GFX7-NEXT: s_waitcnt lgkmcnt(4)
110 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v7
111 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6
112 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4
113 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1
114 ; GFX7-NEXT: s_waitcnt lgkmcnt(2)
115 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v9
116 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
117 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0
118 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v10
119 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v8
120 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4
121 ; GFX7-NEXT: v_or_b32_e32 v2, v0, v2
122 ; GFX7-NEXT: v_mov_b32_e32 v0, v3
123 ; GFX7-NEXT: s_setpc_b64 s[30:31]
125 ; GFX10-LABEL: load_lds_v3i32_align1:
127 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; GFX10-NEXT: ds_read_u8 v1, v0
129 ; GFX10-NEXT: ds_read_u8 v2, v0 offset:1
130 ; GFX10-NEXT: ds_read_u8 v3, v0 offset:2
131 ; GFX10-NEXT: ds_read_u8 v4, v0 offset:3
132 ; GFX10-NEXT: ds_read_u8 v5, v0 offset:4
133 ; GFX10-NEXT: ds_read_u8 v6, v0 offset:5
134 ; GFX10-NEXT: ds_read_u8 v7, v0 offset:6
135 ; GFX10-NEXT: ds_read_u8 v8, v0 offset:7
136 ; GFX10-NEXT: ds_read_u8 v9, v0 offset:8
137 ; GFX10-NEXT: ds_read_u8 v10, v0 offset:9
138 ; GFX10-NEXT: ds_read_u8 v11, v0 offset:11
139 ; GFX10-NEXT: ds_read_u8 v0, v0 offset:10
140 ; GFX10-NEXT: s_waitcnt lgkmcnt(10)
141 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 8, v1
142 ; GFX10-NEXT: s_waitcnt lgkmcnt(9)
143 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
144 ; GFX10-NEXT: s_waitcnt lgkmcnt(8)
145 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v4
146 ; GFX10-NEXT: s_waitcnt lgkmcnt(6)
147 ; GFX10-NEXT: v_lshl_or_b32 v4, v6, 8, v5
148 ; GFX10-NEXT: s_waitcnt lgkmcnt(5)
149 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v7
150 ; GFX10-NEXT: s_waitcnt lgkmcnt(4)
151 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v8
152 ; GFX10-NEXT: s_waitcnt lgkmcnt(2)
153 ; GFX10-NEXT: v_lshl_or_b32 v7, v10, 8, v9
154 ; GFX10-NEXT: s_waitcnt lgkmcnt(1)
155 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v11
156 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
157 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v0
158 ; GFX10-NEXT: v_or3_b32 v0, v2, v3, v1
159 ; GFX10-NEXT: v_or3_b32 v1, v5, v6, v4
160 ; GFX10-NEXT: v_or3_b32 v2, v8, v9, v7
161 ; GFX10-NEXT: s_setpc_b64 s[30:31]
163 ; GFX11-LABEL: load_lds_v3i32_align1:
165 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX11-NEXT: ds_load_u8 v1, v0
167 ; GFX11-NEXT: ds_load_u8 v2, v0 offset:1
168 ; GFX11-NEXT: ds_load_u8 v3, v0 offset:2
169 ; GFX11-NEXT: ds_load_u8 v4, v0 offset:3
170 ; GFX11-NEXT: ds_load_u8 v5, v0 offset:4
171 ; GFX11-NEXT: ds_load_u8 v6, v0 offset:5
172 ; GFX11-NEXT: ds_load_u8 v7, v0 offset:6
173 ; GFX11-NEXT: ds_load_u8 v8, v0 offset:7
174 ; GFX11-NEXT: ds_load_u8 v9, v0 offset:8
175 ; GFX11-NEXT: ds_load_u8 v10, v0 offset:9
176 ; GFX11-NEXT: ds_load_u8 v11, v0 offset:11
177 ; GFX11-NEXT: ds_load_u8 v0, v0 offset:10
178 ; GFX11-NEXT: s_waitcnt lgkmcnt(10)
179 ; GFX11-NEXT: v_lshl_or_b32 v1, v2, 8, v1
180 ; GFX11-NEXT: s_waitcnt lgkmcnt(9)
181 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
182 ; GFX11-NEXT: s_waitcnt lgkmcnt(8)
183 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v4
184 ; GFX11-NEXT: s_waitcnt lgkmcnt(6)
185 ; GFX11-NEXT: v_lshl_or_b32 v4, v6, 8, v5
186 ; GFX11-NEXT: s_waitcnt lgkmcnt(5)
187 ; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v7
188 ; GFX11-NEXT: s_waitcnt lgkmcnt(4)
189 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 24, v8
190 ; GFX11-NEXT: s_waitcnt lgkmcnt(2)
191 ; GFX11-NEXT: v_lshl_or_b32 v7, v10, 8, v9
192 ; GFX11-NEXT: s_waitcnt lgkmcnt(1)
193 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, 24, v11
194 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
195 ; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v0
196 ; GFX11-NEXT: v_or3_b32 v0, v2, v3, v1
197 ; GFX11-NEXT: v_or3_b32 v1, v5, v6, v4
198 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
199 ; GFX11-NEXT: v_or3_b32 v2, v8, v9, v7
200 ; GFX11-NEXT: s_setpc_b64 s[30:31]
201 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1
205 define <3 x i32> @load_lds_v3i32_align2(ptr addrspace(3) %ptr) {
206 ; GFX9-LABEL: load_lds_v3i32_align2:
208 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX9-NEXT: ds_read_u16 v1, v0
210 ; GFX9-NEXT: ds_read_u16 v2, v0 offset:2
211 ; GFX9-NEXT: ds_read_u16 v3, v0 offset:4
212 ; GFX9-NEXT: ds_read_u16 v4, v0 offset:6
213 ; GFX9-NEXT: ds_read_u16 v5, v0 offset:8
214 ; GFX9-NEXT: ds_read_u16 v6, v0 offset:10
215 ; GFX9-NEXT: s_waitcnt lgkmcnt(4)
216 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1
217 ; GFX9-NEXT: s_waitcnt lgkmcnt(2)
218 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3
219 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
220 ; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5
221 ; GFX9-NEXT: s_setpc_b64 s[30:31]
223 ; GFX7-LABEL: load_lds_v3i32_align2:
225 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226 ; GFX7-NEXT: ds_read_u16 v1, v0
227 ; GFX7-NEXT: s_mov_b32 m0, -1
228 ; GFX7-NEXT: ds_read_u16 v2, v0 offset:2
229 ; GFX7-NEXT: ds_read_u16 v3, v0 offset:4
230 ; GFX7-NEXT: ds_read_u16 v4, v0 offset:6
231 ; GFX7-NEXT: ds_read_u16 v5, v0 offset:8
232 ; GFX7-NEXT: ds_read_u16 v6, v0 offset:10
233 ; GFX7-NEXT: s_waitcnt lgkmcnt(4)
234 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v2
235 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
236 ; GFX7-NEXT: s_waitcnt lgkmcnt(2)
237 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v4
238 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
239 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v6
240 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
241 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
242 ; GFX7-NEXT: s_setpc_b64 s[30:31]
244 ; GFX10-LABEL: load_lds_v3i32_align2:
246 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; GFX10-NEXT: ds_read_u16 v1, v0
248 ; GFX10-NEXT: ds_read_u16 v2, v0 offset:2
249 ; GFX10-NEXT: ds_read_u16 v3, v0 offset:4
250 ; GFX10-NEXT: ds_read_u16 v4, v0 offset:6
251 ; GFX10-NEXT: ds_read_u16 v5, v0 offset:8
252 ; GFX10-NEXT: ds_read_u16 v6, v0 offset:10
253 ; GFX10-NEXT: s_waitcnt lgkmcnt(4)
254 ; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1
255 ; GFX10-NEXT: s_waitcnt lgkmcnt(2)
256 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3
257 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
258 ; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5
259 ; GFX10-NEXT: s_setpc_b64 s[30:31]
261 ; GFX11-LABEL: load_lds_v3i32_align2:
263 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX11-NEXT: ds_load_u16 v1, v0
265 ; GFX11-NEXT: ds_load_u16 v2, v0 offset:2
266 ; GFX11-NEXT: ds_load_u16 v3, v0 offset:4
267 ; GFX11-NEXT: ds_load_u16 v4, v0 offset:6
268 ; GFX11-NEXT: ds_load_u16 v5, v0 offset:8
269 ; GFX11-NEXT: ds_load_u16 v6, v0 offset:10
270 ; GFX11-NEXT: s_waitcnt lgkmcnt(4)
271 ; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v1
272 ; GFX11-NEXT: s_waitcnt lgkmcnt(2)
273 ; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3
274 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
275 ; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5
276 ; GFX11-NEXT: s_setpc_b64 s[30:31]
277 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 2
281 define <3 x i32> @load_lds_v3i32_align4(ptr addrspace(3) %ptr) {
282 ; GFX9-LABEL: load_lds_v3i32_align4:
284 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX9-NEXT: v_mov_b32_e32 v2, v0
286 ; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
287 ; GFX9-NEXT: ds_read_b32 v2, v2 offset:8
288 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
289 ; GFX9-NEXT: s_setpc_b64 s[30:31]
291 ; GFX7-LABEL: load_lds_v3i32_align4:
293 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
295 ; GFX7-NEXT: s_mov_b32 m0, -1
296 ; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
297 ; GFX7-NEXT: ds_read_b32 v2, v2 offset:8
298 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
299 ; GFX7-NEXT: s_setpc_b64 s[30:31]
301 ; GFX10-LABEL: load_lds_v3i32_align4:
303 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
305 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
306 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
307 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
308 ; GFX10-NEXT: s_setpc_b64 s[30:31]
310 ; GFX11-LABEL: load_lds_v3i32_align4:
312 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
314 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
315 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
316 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
317 ; GFX11-NEXT: s_setpc_b64 s[30:31]
318 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 4
322 define <3 x i32> @load_lds_v3i32_align8(ptr addrspace(3) %ptr) {
323 ; GFX9-LABEL: load_lds_v3i32_align8:
325 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GFX9-NEXT: v_mov_b32_e32 v2, v0
327 ; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
328 ; GFX9-NEXT: ds_read_b32 v2, v2 offset:8
329 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
330 ; GFX9-NEXT: s_setpc_b64 s[30:31]
332 ; GFX7-LABEL: load_lds_v3i32_align8:
334 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
336 ; GFX7-NEXT: s_mov_b32 m0, -1
337 ; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
338 ; GFX7-NEXT: ds_read_b32 v2, v2 offset:8
339 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
340 ; GFX7-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10-LABEL: load_lds_v3i32_align8:
344 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
346 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
347 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
348 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
349 ; GFX10-NEXT: s_setpc_b64 s[30:31]
351 ; GFX11-LABEL: load_lds_v3i32_align8:
353 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
355 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
356 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
357 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
358 ; GFX11-NEXT: s_setpc_b64 s[30:31]
359 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 8
363 define <3 x i32> @load_lds_v3i32_align16(ptr addrspace(3) %ptr) {
364 ; GFX9-LABEL: load_lds_v3i32_align16:
366 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX9-NEXT: ds_read_b96 v[0:2], v0
368 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
369 ; GFX9-NEXT: s_setpc_b64 s[30:31]
371 ; GFX7-LABEL: load_lds_v3i32_align16:
373 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX7-NEXT: s_mov_b32 m0, -1
375 ; GFX7-NEXT: ds_read_b96 v[0:2], v0
376 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
377 ; GFX7-NEXT: s_setpc_b64 s[30:31]
379 ; GFX10-LABEL: load_lds_v3i32_align16:
381 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382 ; GFX10-NEXT: ds_read_b96 v[0:2], v0
383 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
384 ; GFX10-NEXT: s_setpc_b64 s[30:31]
386 ; GFX11-LABEL: load_lds_v3i32_align16:
388 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX11-NEXT: ds_load_b96 v[0:2], v0
390 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
391 ; GFX11-NEXT: s_setpc_b64 s[30:31]
392 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 16