1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-scratch-access < %s | FileCheck --check-prefix=GFX7-ALIGNED %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-scratch-access < %s | FileCheck --check-prefix=GFX7-UNALIGNED %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-scratch-access < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-scratch-access -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX9-FLASTSCR %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+unaligned-scratch-access < %s | FileCheck --check-prefix=GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+unaligned-scratch-access -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX10-FLASTSCR %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+unaligned-scratch-access < %s | FileCheck --check-prefix=GFX11 %s
9 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+unaligned-scratch-access -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX11-FLASTSCR %s
11 ; Should not merge this to a dword load
12 define i32 @private_load_2xi16_align2(ptr addrspace(5) %p) #0 {
13 ; GFX7-ALIGNED-LABEL: private_load_2xi16_align2:
14 ; GFX7-ALIGNED: ; %bb.0:
15 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v1, vcc, 2, v0
17 ; GFX7-ALIGNED-NEXT: buffer_load_ushort v1, v1, s[0:3], 0 offen
18 ; GFX7-ALIGNED-NEXT: buffer_load_ushort v0, v0, s[0:3], 0 offen
19 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1)
20 ; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
21 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
22 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1
23 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
25 ; GFX7-UNALIGNED-LABEL: private_load_2xi16_align2:
26 ; GFX7-UNALIGNED: ; %bb.0:
27 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; GFX7-UNALIGNED-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
29 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
30 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
32 ; GFX9-LABEL: private_load_2xi16_align2:
34 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
36 ; GFX9-NEXT: s_waitcnt vmcnt(0)
37 ; GFX9-NEXT: s_setpc_b64 s[30:31]
39 ; GFX9-FLASTSCR-LABEL: private_load_2xi16_align2:
40 ; GFX9-FLASTSCR: ; %bb.0:
41 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX9-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
43 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
44 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
46 ; GFX10-LABEL: private_load_2xi16_align2:
48 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
50 ; GFX10-NEXT: s_waitcnt vmcnt(0)
51 ; GFX10-NEXT: s_setpc_b64 s[30:31]
53 ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align2:
54 ; GFX10-FLASTSCR: ; %bb.0:
55 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
57 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
58 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
60 ; GFX11-LABEL: private_load_2xi16_align2:
62 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX11-NEXT: scratch_load_b32 v0, v0, off
64 ; GFX11-NEXT: s_waitcnt vmcnt(0)
65 ; GFX11-NEXT: s_setpc_b64 s[30:31]
67 ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align2:
68 ; GFX11-FLASTSCR: ; %bb.0:
69 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
71 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
72 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
73 %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1
74 %p.0 = load i16, ptr addrspace(5) %p, align 2
75 %p.1 = load i16, ptr addrspace(5) %gep.p, align 2
76 %zext.0 = zext i16 %p.0 to i32
77 %zext.1 = zext i16 %p.1 to i32
78 %shl.1 = shl i32 %zext.1, 16
79 %or = or i32 %zext.0, %shl.1
83 ; Should not merge this to a dword store
84 define void @private_store_2xi16_align2(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 {
85 ; GFX7-ALIGNED-LABEL: private_store_2xi16_align2:
86 ; GFX7-ALIGNED: ; %bb.0:
87 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, 1
89 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, 2
90 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v1
91 ; GFX7-ALIGNED-NEXT: buffer_store_short v3, v1, s[0:3], 0 offen
92 ; GFX7-ALIGNED-NEXT: buffer_store_short v0, v2, s[0:3], 0 offen
93 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
94 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
96 ; GFX7-UNALIGNED-LABEL: private_store_2xi16_align2:
97 ; GFX7-UNALIGNED: ; %bb.0:
98 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x20001
100 ; GFX7-UNALIGNED-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
101 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
102 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
104 ; GFX9-LABEL: private_store_2xi16_align2:
106 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
108 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
110 ; GFX9-NEXT: s_setpc_b64 s[30:31]
112 ; GFX9-FLASTSCR-LABEL: private_store_2xi16_align2:
113 ; GFX9-FLASTSCR: ; %bb.0:
114 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115 ; GFX9-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
116 ; GFX9-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
117 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
118 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
120 ; GFX10-LABEL: private_store_2xi16_align2:
122 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
124 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
125 ; GFX10-NEXT: s_setpc_b64 s[30:31]
127 ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align2:
128 ; GFX10-FLASTSCR: ; %bb.0:
129 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
131 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
132 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
134 ; GFX11-LABEL: private_store_2xi16_align2:
136 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
138 ; GFX11-NEXT: scratch_store_b32 v1, v0, off
139 ; GFX11-NEXT: s_setpc_b64 s[30:31]
141 ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align2:
142 ; GFX11-FLASTSCR: ; %bb.0:
143 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
145 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
146 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
147 %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
148 store i16 1, ptr addrspace(5) %r, align 2
149 store i16 2, ptr addrspace(5) %gep.r, align 2
153 ; Should produce align 1 dword when legal
154 define i32 @private_load_2xi16_align1(ptr addrspace(5) %p) #0 {
155 ; GFX7-ALIGNED-LABEL: private_load_2xi16_align1:
156 ; GFX7-ALIGNED: ; %bb.0:
157 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v1, vcc, 2, v0
159 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 1, v0
160 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v3, vcc, 3, v0
161 ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v3, v3, s[0:3], 0 offen
162 ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v2, v2, s[0:3], 0 offen
163 ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen
164 ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v0, v0, s[0:3], 0 offen
165 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(3)
166 ; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v3
167 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(2)
168 ; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 8, v2
169 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1)
170 ; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
171 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
172 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v2, v0
173 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v3, v1
174 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1
175 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
177 ; GFX7-UNALIGNED-LABEL: private_load_2xi16_align1:
178 ; GFX7-UNALIGNED: ; %bb.0:
179 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX7-UNALIGNED-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
181 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
182 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
184 ; GFX9-LABEL: private_load_2xi16_align1:
186 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
188 ; GFX9-NEXT: s_waitcnt vmcnt(0)
189 ; GFX9-NEXT: s_setpc_b64 s[30:31]
191 ; GFX9-FLASTSCR-LABEL: private_load_2xi16_align1:
192 ; GFX9-FLASTSCR: ; %bb.0:
193 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX9-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
195 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
196 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
198 ; GFX10-LABEL: private_load_2xi16_align1:
200 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
202 ; GFX10-NEXT: s_waitcnt vmcnt(0)
203 ; GFX10-NEXT: s_setpc_b64 s[30:31]
205 ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align1:
206 ; GFX10-FLASTSCR: ; %bb.0:
207 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
209 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
210 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
212 ; GFX11-LABEL: private_load_2xi16_align1:
214 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; GFX11-NEXT: scratch_load_b32 v0, v0, off
216 ; GFX11-NEXT: s_waitcnt vmcnt(0)
217 ; GFX11-NEXT: s_setpc_b64 s[30:31]
219 ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align1:
220 ; GFX11-FLASTSCR: ; %bb.0:
221 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
223 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
224 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
225 %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1
226 %p.0 = load i16, ptr addrspace(5) %p, align 1
227 %p.1 = load i16, ptr addrspace(5) %gep.p, align 1
228 %zext.0 = zext i16 %p.0 to i32
229 %zext.1 = zext i16 %p.1 to i32
230 %shl.1 = shl i32 %zext.1, 16
231 %or = or i32 %zext.0, %shl.1
235 ; Should produce align 1 dword when legal
236 define void @private_store_2xi16_align1(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 {
237 ; GFX7-ALIGNED-LABEL: private_store_2xi16_align1:
238 ; GFX7-ALIGNED: ; %bb.0:
239 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, 1
241 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v1
242 ; GFX7-ALIGNED-NEXT: buffer_store_byte v3, v1, s[0:3], 0 offen
243 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v3, vcc, 1, v1
244 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v4, 0
245 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v1, vcc, 3, v1
246 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, 2
247 ; GFX7-ALIGNED-NEXT: buffer_store_byte v4, v3, s[0:3], 0 offen
248 ; GFX7-ALIGNED-NEXT: buffer_store_byte v4, v1, s[0:3], 0 offen
249 ; GFX7-ALIGNED-NEXT: buffer_store_byte v0, v2, s[0:3], 0 offen
250 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
251 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
253 ; GFX7-UNALIGNED-LABEL: private_store_2xi16_align1:
254 ; GFX7-UNALIGNED: ; %bb.0:
255 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x20001
257 ; GFX7-UNALIGNED-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
258 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
259 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
261 ; GFX9-LABEL: private_store_2xi16_align1:
263 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
265 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
266 ; GFX9-NEXT: s_waitcnt vmcnt(0)
267 ; GFX9-NEXT: s_setpc_b64 s[30:31]
269 ; GFX9-FLASTSCR-LABEL: private_store_2xi16_align1:
270 ; GFX9-FLASTSCR: ; %bb.0:
271 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272 ; GFX9-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
273 ; GFX9-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
274 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
275 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
277 ; GFX10-LABEL: private_store_2xi16_align1:
279 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
281 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
282 ; GFX10-NEXT: s_setpc_b64 s[30:31]
284 ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align1:
285 ; GFX10-FLASTSCR: ; %bb.0:
286 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
288 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
289 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
291 ; GFX11-LABEL: private_store_2xi16_align1:
293 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
295 ; GFX11-NEXT: scratch_store_b32 v1, v0, off
296 ; GFX11-NEXT: s_setpc_b64 s[30:31]
298 ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align1:
299 ; GFX11-FLASTSCR: ; %bb.0:
300 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
302 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
303 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
304 %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
305 store i16 1, ptr addrspace(5) %r, align 1
306 store i16 2, ptr addrspace(5) %gep.r, align 1
310 ; Should merge this to a dword load
311 define i32 @private_load_2xi16_align4(ptr addrspace(5) %p) #0 {
312 ; GFX7-ALIGNED-LABEL: private_load_2xi16_align4:
313 ; GFX7-ALIGNED: ; %bb.0:
314 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315 ; GFX7-ALIGNED-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
316 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
317 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
319 ; GFX7-UNALIGNED-LABEL: private_load_2xi16_align4:
320 ; GFX7-UNALIGNED: ; %bb.0:
321 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX7-UNALIGNED-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
323 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
324 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
326 ; GFX9-LABEL: private_load_2xi16_align4:
328 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
330 ; GFX9-NEXT: s_waitcnt vmcnt(0)
331 ; GFX9-NEXT: s_setpc_b64 s[30:31]
333 ; GFX9-FLASTSCR-LABEL: private_load_2xi16_align4:
334 ; GFX9-FLASTSCR: ; %bb.0:
335 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GFX9-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
337 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
338 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
340 ; GFX10-LABEL: private_load_2xi16_align4:
342 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
344 ; GFX10-NEXT: s_waitcnt vmcnt(0)
345 ; GFX10-NEXT: s_setpc_b64 s[30:31]
347 ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align4:
348 ; GFX10-FLASTSCR: ; %bb.0:
349 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
351 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
352 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
354 ; GFX11-LABEL: private_load_2xi16_align4:
356 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX11-NEXT: scratch_load_b32 v0, v0, off
358 ; GFX11-NEXT: s_waitcnt vmcnt(0)
359 ; GFX11-NEXT: s_setpc_b64 s[30:31]
361 ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align4:
362 ; GFX11-FLASTSCR: ; %bb.0:
363 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
365 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
366 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
367 %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1
368 %p.0 = load i16, ptr addrspace(5) %p, align 4
369 %p.1 = load i16, ptr addrspace(5) %gep.p, align 2
370 %zext.0 = zext i16 %p.0 to i32
371 %zext.1 = zext i16 %p.1 to i32
372 %shl.1 = shl i32 %zext.1, 16
373 %or = or i32 %zext.0, %shl.1
377 ; Should merge this to a dword store
378 define void @private_store_2xi16_align4(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 {
379 ; GFX7-ALIGNED-LABEL: private_store_2xi16_align4:
380 ; GFX7-ALIGNED: ; %bb.0:
381 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, 0x20001
383 ; GFX7-ALIGNED-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
384 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0)
385 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31]
387 ; GFX7-UNALIGNED-LABEL: private_store_2xi16_align4:
388 ; GFX7-UNALIGNED: ; %bb.0:
389 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x20001
391 ; GFX7-UNALIGNED-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
392 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
393 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
395 ; GFX9-LABEL: private_store_2xi16_align4:
397 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
399 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
400 ; GFX9-NEXT: s_waitcnt vmcnt(0)
401 ; GFX9-NEXT: s_setpc_b64 s[30:31]
403 ; GFX9-FLASTSCR-LABEL: private_store_2xi16_align4:
404 ; GFX9-FLASTSCR: ; %bb.0:
405 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GFX9-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
407 ; GFX9-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
408 ; GFX9-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
409 ; GFX9-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
411 ; GFX10-LABEL: private_store_2xi16_align4:
413 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
415 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
416 ; GFX10-NEXT: s_setpc_b64 s[30:31]
418 ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align4:
419 ; GFX10-FLASTSCR: ; %bb.0:
420 ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
422 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
423 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
425 ; GFX11-LABEL: private_store_2xi16_align4:
427 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
429 ; GFX11-NEXT: scratch_store_b32 v1, v0, off
430 ; GFX11-NEXT: s_setpc_b64 s[30:31]
432 ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align4:
433 ; GFX11-FLASTSCR: ; %bb.0:
434 ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
436 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
437 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
438 %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
439 store i16 1, ptr addrspace(5) %r, align 4
440 store i16 2, ptr addrspace(5) %gep.r, align 2