1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
6 define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
7 ; GFX8-LABEL: constant_load_i8_align4:
9 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
11 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
12 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
13 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
14 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
15 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
16 ; GFX8-NEXT: flat_store_byte v[0:1], v2
19 ; GFX9-LABEL: constant_load_i8_align4:
21 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
22 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
23 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
24 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
25 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
26 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
27 ; GFX9-NEXT: global_store_byte v1, v0, s[0:1]
30 ; GFX10-LABEL: constant_load_i8_align4:
32 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
33 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
34 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
35 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
36 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
37 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
38 ; GFX10-NEXT: global_store_byte v1, v0, s[0:1]
39 ; GFX10-NEXT: s_endpgm
40 %ld = load i8, ptr addrspace(4) %in, align 4
41 store i8 %ld, ptr addrspace(1) %out, align 4
45 define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
46 ; GFX8-LABEL: constant_load_i16_align4:
48 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
49 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
50 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
51 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
52 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
53 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
54 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
55 ; GFX8-NEXT: flat_store_short v[0:1], v2
58 ; GFX9-LABEL: constant_load_i16_align4:
60 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
61 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
62 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
63 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
64 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
65 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
66 ; GFX9-NEXT: global_store_short v1, v0, s[0:1]
69 ; GFX10-LABEL: constant_load_i16_align4:
71 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
72 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
73 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
74 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
75 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
76 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
77 ; GFX10-NEXT: global_store_short v1, v0, s[0:1]
78 ; GFX10-NEXT: s_endpgm
79 %ld = load i16, ptr addrspace(4) %in, align 4
80 store i16 %ld, ptr addrspace(1) %out, align 4
84 define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
85 ; GFX8-LABEL: sextload_i8_to_i32_align4:
87 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
88 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
89 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
90 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
91 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
92 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
93 ; GFX8-NEXT: s_sext_i32_i8 s2, s2
94 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
95 ; GFX8-NEXT: flat_store_dword v[0:1], v2
98 ; GFX9-LABEL: sextload_i8_to_i32_align4:
100 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
101 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
102 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
103 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
104 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
105 ; GFX9-NEXT: s_sext_i32_i8 s2, s2
106 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
107 ; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
108 ; GFX9-NEXT: s_endpgm
110 ; GFX10-LABEL: sextload_i8_to_i32_align4:
112 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
113 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
114 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
116 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
117 ; GFX10-NEXT: s_sext_i32_i8 s2, s2
118 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
119 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
120 ; GFX10-NEXT: s_endpgm
121 %load = load i8, ptr addrspace(1) %in, align 4
122 %sext = sext i8 %load to i32
123 store i32 %sext, ptr addrspace(1) %out, align 4
127 define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
128 ; GFX8-LABEL: sextload_i16_to_i32_align4:
130 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
131 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
132 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
133 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
134 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
135 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
136 ; GFX8-NEXT: s_sext_i32_i16 s2, s2
137 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
138 ; GFX8-NEXT: flat_store_dword v[0:1], v2
139 ; GFX8-NEXT: s_endpgm
141 ; GFX9-LABEL: sextload_i16_to_i32_align4:
143 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
144 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
145 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
146 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
147 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
148 ; GFX9-NEXT: s_sext_i32_i16 s2, s2
149 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
150 ; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
151 ; GFX9-NEXT: s_endpgm
153 ; GFX10-LABEL: sextload_i16_to_i32_align4:
155 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
156 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
157 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
158 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
159 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
160 ; GFX10-NEXT: s_sext_i32_i16 s2, s2
161 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
162 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
163 ; GFX10-NEXT: s_endpgm
164 %load = load i16, ptr addrspace(1) %in, align 4
165 %sext = sext i16 %load to i32
166 store i32 %sext, ptr addrspace(1) %out, align 4
170 define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
171 ; GFX8-LABEL: zextload_i8_to_i32_align4:
173 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
174 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
175 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
176 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
177 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
178 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
179 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff
180 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
181 ; GFX8-NEXT: flat_store_dword v[0:1], v2
182 ; GFX8-NEXT: s_endpgm
184 ; GFX9-LABEL: zextload_i8_to_i32_align4:
186 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
187 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
188 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
189 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
190 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
191 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff
192 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
193 ; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
194 ; GFX9-NEXT: s_endpgm
196 ; GFX10-LABEL: zextload_i8_to_i32_align4:
198 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
199 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
201 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
202 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
203 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
204 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
205 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
206 ; GFX10-NEXT: s_endpgm
207 %load = load i8, ptr addrspace(1) %in, align 4
208 %zext = zext i8 %load to i32
209 store i32 %zext, ptr addrspace(1) %out, align 4
213 define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
214 ; GFX8-LABEL: zextload_i16_to_i32_align4:
216 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
217 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
218 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
219 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
220 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
221 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
222 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
223 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
224 ; GFX8-NEXT: flat_store_dword v[0:1], v2
225 ; GFX8-NEXT: s_endpgm
227 ; GFX9-LABEL: zextload_i16_to_i32_align4:
229 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
230 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
231 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
233 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
234 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
235 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
236 ; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
237 ; GFX9-NEXT: s_endpgm
239 ; GFX10-LABEL: zextload_i16_to_i32_align4:
241 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
242 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
243 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
244 ; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
245 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
246 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
247 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
248 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
249 ; GFX10-NEXT: s_endpgm
250 %load = load i16, ptr addrspace(1) %in, align 4
251 %zext = zext i16 %load to i32
252 store i32 %zext, ptr addrspace(1) %out, align 4
256 define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
257 ; GFX8-LABEL: constant_load_i8_align2:
259 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
260 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
262 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
263 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
264 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
265 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
266 ; GFX8-NEXT: s_waitcnt vmcnt(0)
267 ; GFX8-NEXT: flat_store_byte v[0:1], v2
268 ; GFX8-NEXT: s_endpgm
270 ; GFX9-LABEL: constant_load_i8_align2:
272 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
273 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
274 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
275 ; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3]
276 ; GFX9-NEXT: s_waitcnt vmcnt(0)
277 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
278 ; GFX9-NEXT: s_endpgm
280 ; GFX10-LABEL: constant_load_i8_align2:
282 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
283 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
284 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
285 ; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3]
286 ; GFX10-NEXT: s_waitcnt vmcnt(0)
287 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
288 ; GFX10-NEXT: s_endpgm
289 %load = load i8, ptr addrspace(1) %in, align 2
290 store i8 %load, ptr addrspace(1) %out, align 2
294 define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
295 ; GFX8-LABEL: constant_load_i16_align2:
297 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
298 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
299 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
300 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
301 ; GFX8-NEXT: flat_load_ushort v2, v[0:1]
302 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
303 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
304 ; GFX8-NEXT: s_waitcnt vmcnt(0)
305 ; GFX8-NEXT: flat_store_short v[0:1], v2
306 ; GFX8-NEXT: s_endpgm
308 ; GFX9-LABEL: constant_load_i16_align2:
310 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
311 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
312 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
313 ; GFX9-NEXT: global_load_ushort v1, v0, s[2:3]
314 ; GFX9-NEXT: s_waitcnt vmcnt(0)
315 ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
316 ; GFX9-NEXT: s_endpgm
318 ; GFX10-LABEL: constant_load_i16_align2:
320 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
321 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
322 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
323 ; GFX10-NEXT: global_load_ushort v1, v0, s[2:3]
324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
325 ; GFX10-NEXT: global_store_short v0, v1, s[0:1]
326 ; GFX10-NEXT: s_endpgm
327 %load = load i16, ptr addrspace(1) %in, align 2
328 store i16 %load, ptr addrspace(1) %out, align 2
332 define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
333 ; GFX8-LABEL: constant_sextload_i8_align2:
335 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
336 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
337 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
338 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
339 ; GFX8-NEXT: flat_load_sbyte v2, v[0:1]
340 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
341 ; GFX8-NEXT: s_add_u32 s2, s0, 2
342 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
343 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
344 ; GFX8-NEXT: s_waitcnt vmcnt(0)
345 ; GFX8-NEXT: flat_store_short v[0:1], v2
346 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
347 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2
348 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
349 ; GFX8-NEXT: flat_store_short v[0:1], v3
350 ; GFX8-NEXT: s_endpgm
352 ; GFX9-LABEL: constant_sextload_i8_align2:
354 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
355 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
356 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
357 ; GFX9-NEXT: global_load_sbyte v1, v0, s[2:3]
358 ; GFX9-NEXT: s_waitcnt vmcnt(0)
359 ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
360 ; GFX9-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2
361 ; GFX9-NEXT: s_endpgm
363 ; GFX10-LABEL: constant_sextload_i8_align2:
365 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
366 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
367 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
368 ; GFX10-NEXT: global_load_sbyte v1, v0, s[2:3]
369 ; GFX10-NEXT: s_waitcnt vmcnt(0)
370 ; GFX10-NEXT: global_store_short v0, v1, s[0:1]
371 ; GFX10-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2
372 ; GFX10-NEXT: s_endpgm
373 %load = load i8, ptr addrspace(1) %in, align 2
374 %sextload = sext i8 %load to i32
375 store i32 %sextload, ptr addrspace(1) %out, align 2
379 define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
380 ; GFX8-LABEL: constant_zextload_i8_align2:
382 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
383 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
384 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
385 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
386 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
387 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
388 ; GFX8-NEXT: s_add_u32 s2, s0, 2
389 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
390 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
391 ; GFX8-NEXT: s_waitcnt vmcnt(0)
392 ; GFX8-NEXT: flat_store_short v[0:1], v2
393 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
394 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2
395 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
396 ; GFX8-NEXT: flat_store_short v[0:1], v3
397 ; GFX8-NEXT: s_endpgm
399 ; GFX9-LABEL: constant_zextload_i8_align2:
401 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
402 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
403 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3]
405 ; GFX9-NEXT: s_waitcnt vmcnt(0)
406 ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
407 ; GFX9-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2
408 ; GFX9-NEXT: s_endpgm
410 ; GFX10-LABEL: constant_zextload_i8_align2:
412 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
413 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
414 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
415 ; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3]
416 ; GFX10-NEXT: s_waitcnt vmcnt(0)
417 ; GFX10-NEXT: global_store_short v0, v1, s[0:1]
418 ; GFX10-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2
419 ; GFX10-NEXT: s_endpgm
420 %load = load i8, ptr addrspace(1) %in, align 2
421 %zextload = zext i8 %load to i32
422 store i32 %zextload, ptr addrspace(1) %out, align 2
426 attributes #0 = { nounwind }