1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DAG %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GISEL %s
5 define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
6 ; GCN-LABEL: test_s_load_i8:
8 ; GCN-NEXT: s_load_i8 s0, s[0:1], 0x0
9 ; GCN-NEXT: s_wait_kmcnt 0x0
10 ; GCN-NEXT: v_mov_b32_e32 v2, s0
11 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
13 %ld = load i8, ptr addrspace(4) %in
14 %sext = sext i8 %ld to i32
15 store i32 %sext, ptr addrspace(1) %out
19 define amdgpu_ps void @test_s_load_i8_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
20 ; DAG-LABEL: test_s_load_i8_imm:
22 ; DAG-NEXT: s_movk_i32 s2, 0xff9c
23 ; DAG-NEXT: s_mov_b32 s3, -1
24 ; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
25 ; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
26 ; DAG-NEXT: s_load_i8 s0, s[0:1], 0x0
27 ; DAG-NEXT: s_wait_kmcnt 0x0
28 ; DAG-NEXT: v_mov_b32_e32 v2, s0
29 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
32 ; GISEL-LABEL: test_s_load_i8_imm:
34 ; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff9c
35 ; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
36 ; GISEL-NEXT: s_load_i8 s0, s[0:1], 0x0
37 ; GISEL-NEXT: s_wait_kmcnt 0x0
38 ; GISEL-NEXT: v_mov_b32_e32 v2, s0
39 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
40 ; GISEL-NEXT: s_endpgm
41 %gep = getelementptr i8, ptr addrspace(4) %in, i64 -100
42 %ld = load i8, ptr addrspace(4) %gep
43 %sext = sext i8 %ld to i32
44 store i32 %sext, ptr addrspace(1) %out
48 define amdgpu_ps void @test_s_load_i8_sgpr(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
49 ; GCN-LABEL: test_s_load_i8_sgpr:
51 ; GCN-NEXT: s_load_i8 s0, s[0:1], s2 offset:0x0
52 ; GCN-NEXT: s_wait_kmcnt 0x0
53 ; GCN-NEXT: v_mov_b32_e32 v2, s0
54 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
56 %zext = zext i32 %offset to i64
57 %gep = getelementptr i8, ptr addrspace(4) %in, i64 %zext
58 %ld = load i8, ptr addrspace(4) %gep
59 %sext = sext i8 %ld to i32
60 store i32 %sext, ptr addrspace(1) %out
64 define amdgpu_ps void @test_s_load_i8_sgpr_imm(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
65 ; GCN-LABEL: test_s_load_i8_sgpr_imm:
67 ; GCN-NEXT: s_load_i8 s0, s[0:1], s2 offset:0x10
68 ; GCN-NEXT: s_wait_kmcnt 0x0
69 ; GCN-NEXT: v_mov_b32_e32 v2, s0
70 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
72 %gep1 = getelementptr i8, ptr addrspace(4) %in, i64 16
73 %zext = zext i32 %offset to i64
74 %gep2 = getelementptr i8, ptr addrspace(4) %gep1, i64 %zext
75 %ld = load i8, ptr addrspace(4) %gep2
76 %sext = sext i8 %ld to i32
77 store i32 %sext, ptr addrspace(1) %out
81 define amdgpu_ps void @test_s_load_i8_divergent(ptr addrspace(4) inreg %in, i32 %offset, ptr addrspace(1) %out) {
82 ; GCN-LABEL: test_s_load_i8_divergent:
84 ; GCN-NEXT: global_load_i8 v0, v0, s[0:1] offset:16
85 ; GCN-NEXT: s_wait_loadcnt 0x0
86 ; GCN-NEXT: global_store_b32 v[1:2], v0, off
88 %gep1 = getelementptr i8, ptr addrspace(4) %in, i64 16
89 %zext = zext i32 %offset to i64
90 %gep2 = getelementptr i8, ptr addrspace(4) %gep1, i64 %zext
91 %ld = load i8, ptr addrspace(4) %gep2
92 %sext = sext i8 %ld to i32
93 store i32 %sext, ptr addrspace(1) %out
97 define amdgpu_ps void @test_s_load_u8(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
98 ; GCN-LABEL: test_s_load_u8:
100 ; GCN-NEXT: s_load_u8 s0, s[0:1], 0x0
101 ; GCN-NEXT: s_wait_kmcnt 0x0
102 ; GCN-NEXT: v_mov_b32_e32 v2, s0
103 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
105 %ld = load i8, ptr addrspace(4) %in
106 %zext = zext i8 %ld to i32
107 store i32 %zext, ptr addrspace(1) %out
111 define amdgpu_ps void @test_s_load_u8_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
112 ; GCN-LABEL: test_s_load_u8_imm:
114 ; GCN-NEXT: s_load_u8 s0, s[0:1], 0xff
115 ; GCN-NEXT: s_wait_kmcnt 0x0
116 ; GCN-NEXT: v_mov_b32_e32 v2, s0
117 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
119 %gep = getelementptr i8, ptr addrspace(4) %in, i64 255
120 %ld = load i8, ptr addrspace(4) %gep
121 %zext = zext i8 %ld to i32
122 store i32 %zext, ptr addrspace(1) %out
126 define amdgpu_ps void @test_s_load_u8_sgpr(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
127 ; GCN-LABEL: test_s_load_u8_sgpr:
129 ; GCN-NEXT: s_load_u8 s0, s[0:1], s2 offset:0x0
130 ; GCN-NEXT: s_wait_kmcnt 0x0
131 ; GCN-NEXT: v_mov_b32_e32 v2, s0
132 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
134 %zext1 = zext i32 %offset to i64
135 %gep = getelementptr i8, ptr addrspace(4) %in, i64 %zext1
136 %ld = load i8, ptr addrspace(4) %gep
137 %zext2 = zext i8 %ld to i32
138 store i32 %zext2, ptr addrspace(1) %out
142 define amdgpu_ps void @test_s_load_u8_sgpr_imm(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
143 ; GCN-LABEL: test_s_load_u8_sgpr_imm:
145 ; GCN-NEXT: s_load_u8 s0, s[0:1], s2 offset:0x10
146 ; GCN-NEXT: s_wait_kmcnt 0x0
147 ; GCN-NEXT: v_mov_b32_e32 v2, s0
148 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
150 %gep1 = getelementptr i8, ptr addrspace(4) %in, i64 16
151 %zext1= zext i32 %offset to i64
152 %gep2 = getelementptr i8, ptr addrspace(4) %gep1, i64 %zext1
153 %ld = load i8, ptr addrspace(4) %gep2
154 %zext2= zext i8 %ld to i32
155 store i32 %zext2, ptr addrspace(1) %out
159 define amdgpu_ps void @test_s_load_u8_divergent(ptr addrspace(4) inreg %in, i32 %offset, ptr addrspace(1) %out) {
160 ; GCN-LABEL: test_s_load_u8_divergent:
162 ; GCN-NEXT: global_load_u8 v0, v0, s[0:1] offset:16
163 ; GCN-NEXT: s_wait_loadcnt 0x0
164 ; GCN-NEXT: global_store_b32 v[1:2], v0, off
166 %gep1 = getelementptr i8, ptr addrspace(4) %in, i64 16
167 %zext1= zext i32 %offset to i64
168 %gep2 = getelementptr i8, ptr addrspace(4) %gep1, i64 %zext1
169 %ld = load i8, ptr addrspace(4) %gep2
170 %zext2= zext i8 %ld to i32
171 store i32 %zext2, ptr addrspace(1) %out
175 define amdgpu_ps void @test_s_load_i16(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
176 ; GCN-LABEL: test_s_load_i16:
178 ; GCN-NEXT: s_load_i16 s0, s[0:1], 0x0
179 ; GCN-NEXT: s_wait_kmcnt 0x0
180 ; GCN-NEXT: v_mov_b32_e32 v2, s0
181 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
183 %ld = load i16, ptr addrspace(4) %in
184 %sext = sext i16 %ld to i32
185 store i32 %sext, ptr addrspace(1) %out
189 define amdgpu_ps void @test_s_load_i16_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
190 ; DAG-LABEL: test_s_load_i16_imm:
192 ; DAG-NEXT: s_movk_i32 s2, 0xff38
193 ; DAG-NEXT: s_mov_b32 s3, -1
194 ; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
195 ; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
196 ; DAG-NEXT: s_load_i16 s0, s[0:1], 0x0
197 ; DAG-NEXT: s_wait_kmcnt 0x0
198 ; DAG-NEXT: v_mov_b32_e32 v2, s0
199 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
202 ; GISEL-LABEL: test_s_load_i16_imm:
204 ; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff38
205 ; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
206 ; GISEL-NEXT: s_load_i16 s0, s[0:1], 0x0
207 ; GISEL-NEXT: s_wait_kmcnt 0x0
208 ; GISEL-NEXT: v_mov_b32_e32 v2, s0
209 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
210 ; GISEL-NEXT: s_endpgm
211 %gep = getelementptr i16, ptr addrspace(4) %in, i64 -100
212 %ld = load i16, ptr addrspace(4) %gep
213 %sext = sext i16 %ld to i32
214 store i32 %sext, ptr addrspace(1) %out
218 define amdgpu_ps void @test_s_load_i16_sgpr(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
219 ; GCN-LABEL: test_s_load_i16_sgpr:
221 ; GCN-NEXT: s_load_i16 s0, s[0:1], s2 offset:0x0
222 ; GCN-NEXT: s_wait_kmcnt 0x0
223 ; GCN-NEXT: v_mov_b32_e32 v2, s0
224 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
226 %zext = zext i32 %offset to i64
227 %gep = getelementptr i8, ptr addrspace(4) %in, i64 %zext
228 %ld = load i16, ptr addrspace(4) %gep
229 %sext = sext i16 %ld to i32
230 store i32 %sext, ptr addrspace(1) %out
234 define amdgpu_ps void @test_s_load_i16_sgpr_imm(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
235 ; DAG-LABEL: test_s_load_i16_sgpr_imm:
237 ; DAG-NEXT: s_mov_b32 s3, 0
238 ; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
239 ; DAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
240 ; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
241 ; DAG-NEXT: s_load_i16 s0, s[0:1], 0x20
242 ; DAG-NEXT: s_wait_kmcnt 0x0
243 ; DAG-NEXT: v_mov_b32_e32 v2, s0
244 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
247 ; GISEL-LABEL: test_s_load_i16_sgpr_imm:
249 ; GISEL-NEXT: s_mov_b32 s3, 0
250 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
251 ; GISEL-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
252 ; GISEL-NEXT: s_add_co_u32 s0, s0, s2
253 ; GISEL-NEXT: s_add_co_ci_u32 s1, s1, s3
254 ; GISEL-NEXT: s_load_i16 s0, s[0:1], 0x20
255 ; GISEL-NEXT: s_wait_kmcnt 0x0
256 ; GISEL-NEXT: v_mov_b32_e32 v2, s0
257 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
258 ; GISEL-NEXT: s_endpgm
259 %gep1 = getelementptr i16, ptr addrspace(4) %in, i64 16
260 %zext = zext i32 %offset to i64
261 %gep2 = getelementptr i16, ptr addrspace(4) %gep1, i64 %zext
262 %ld = load i16, ptr addrspace(4) %gep2
263 %sext = sext i16 %ld to i32
264 store i32 %sext, ptr addrspace(1) %out
268 define amdgpu_ps void @test_s_load_i16_divergent(ptr addrspace(4) inreg %in, i32 %offset, ptr addrspace(1) %out) {
269 ; DAG-LABEL: test_s_load_i16_divergent:
271 ; DAG-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, 0
272 ; DAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
273 ; DAG-NEXT: v_lshlrev_b64_e32 v[3:4], 1, v[3:4]
274 ; DAG-NEXT: v_add_co_u32 v3, vcc_lo, s0, v3
275 ; DAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
276 ; DAG-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s1, v4, vcc_lo
277 ; DAG-NEXT: global_load_i16 v0, v[3:4], off offset:32
278 ; DAG-NEXT: s_wait_loadcnt 0x0
279 ; DAG-NEXT: global_store_b32 v[1:2], v0, off
282 ; GISEL-LABEL: test_s_load_i16_divergent:
284 ; GISEL-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v4, v2
285 ; GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v6, s1
286 ; GISEL-NEXT: v_mov_b32_e32 v5, s0
287 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
288 ; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 1, v[0:1]
289 ; GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v5, v0
290 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
291 ; GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v6, v1, vcc_lo
292 ; GISEL-NEXT: global_load_i16 v0, v[0:1], off offset:32
293 ; GISEL-NEXT: s_wait_loadcnt 0x0
294 ; GISEL-NEXT: global_store_b32 v[3:4], v0, off
295 ; GISEL-NEXT: s_endpgm
296 %gep1 = getelementptr i16, ptr addrspace(4) %in, i64 16
297 %zext = zext i32 %offset to i64
298 %gep2 = getelementptr i16, ptr addrspace(4) %gep1, i64 %zext
299 %ld = load i16, ptr addrspace(4) %gep2
300 %sext = sext i16 %ld to i32
301 store i32 %sext, ptr addrspace(1) %out
305 define amdgpu_ps void @test_s_load_u16(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
306 ; GCN-LABEL: test_s_load_u16:
308 ; GCN-NEXT: s_load_u16 s0, s[0:1], 0x0
309 ; GCN-NEXT: s_wait_kmcnt 0x0
310 ; GCN-NEXT: v_mov_b32_e32 v2, s0
311 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
313 %ld = load i16, ptr addrspace(4) %in
314 %zext = zext i16 %ld to i32
315 store i32 %zext, ptr addrspace(1) %out
319 define amdgpu_ps void @test_s_load_u16_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
320 ; GCN-LABEL: test_s_load_u16_imm:
322 ; GCN-NEXT: s_load_u16 s0, s[0:1], 0x1fe
323 ; GCN-NEXT: s_wait_kmcnt 0x0
324 ; GCN-NEXT: v_mov_b32_e32 v2, s0
325 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
327 %gep = getelementptr i16, ptr addrspace(4) %in, i64 255
328 %ld = load i16, ptr addrspace(4) %gep
329 %zext = zext i16 %ld to i32
330 store i32 %zext, ptr addrspace(1) %out
334 define amdgpu_ps void @test_s_load_u16_sgpr(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
335 ; GCN-LABEL: test_s_load_u16_sgpr:
337 ; GCN-NEXT: s_load_u16 s0, s[0:1], s2 offset:0x0
338 ; GCN-NEXT: s_wait_kmcnt 0x0
339 ; GCN-NEXT: v_mov_b32_e32 v2, s0
340 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
342 %zext1 = zext i32 %offset to i64
343 %gep = getelementptr i8, ptr addrspace(4) %in, i64 %zext1
344 %ld = load i16, ptr addrspace(4) %gep
345 %zext2 = zext i16 %ld to i32
346 store i32 %zext2, ptr addrspace(1) %out
350 define amdgpu_ps void @test_s_load_u16_sgpr_imm(ptr addrspace(4) inreg %in, i32 inreg %offset, ptr addrspace(1) %out) {
351 ; DAG-LABEL: test_s_load_u16_sgpr_imm:
353 ; DAG-NEXT: s_mov_b32 s3, 0
354 ; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
355 ; DAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
356 ; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
357 ; DAG-NEXT: s_load_u16 s0, s[0:1], 0x20
358 ; DAG-NEXT: s_wait_kmcnt 0x0
359 ; DAG-NEXT: v_mov_b32_e32 v2, s0
360 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
363 ; GISEL-LABEL: test_s_load_u16_sgpr_imm:
365 ; GISEL-NEXT: s_mov_b32 s3, 0
366 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
367 ; GISEL-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
368 ; GISEL-NEXT: s_add_co_u32 s0, s0, s2
369 ; GISEL-NEXT: s_add_co_ci_u32 s1, s1, s3
370 ; GISEL-NEXT: s_load_u16 s0, s[0:1], 0x20
371 ; GISEL-NEXT: s_wait_kmcnt 0x0
372 ; GISEL-NEXT: v_mov_b32_e32 v2, s0
373 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
374 ; GISEL-NEXT: s_endpgm
375 %gep1 = getelementptr i16, ptr addrspace(4) %in, i64 16
376 %zext1= zext i32 %offset to i64
377 %gep2 = getelementptr i16, ptr addrspace(4) %gep1, i64 %zext1
378 %ld = load i16, ptr addrspace(4) %gep2
379 %zext2= zext i16 %ld to i32
380 store i32 %zext2, ptr addrspace(1) %out
384 define amdgpu_ps void @test_s_load_u16_divergent(ptr addrspace(4) inreg %in, i32 %offset, ptr addrspace(1) %out) {
385 ; DAG-LABEL: test_s_load_u16_divergent:
387 ; DAG-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, 0
388 ; DAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
389 ; DAG-NEXT: v_lshlrev_b64_e32 v[3:4], 1, v[3:4]
390 ; DAG-NEXT: v_add_co_u32 v3, vcc_lo, s0, v3
391 ; DAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
392 ; DAG-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s1, v4, vcc_lo
393 ; DAG-NEXT: global_load_u16 v0, v[3:4], off offset:32
394 ; DAG-NEXT: s_wait_loadcnt 0x0
395 ; DAG-NEXT: global_store_b32 v[1:2], v0, off
398 ; GISEL-LABEL: test_s_load_u16_divergent:
400 ; GISEL-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v4, v2
401 ; GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v6, s1
402 ; GISEL-NEXT: v_mov_b32_e32 v5, s0
403 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
404 ; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 1, v[0:1]
405 ; GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v5, v0
406 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
407 ; GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v6, v1, vcc_lo
408 ; GISEL-NEXT: global_load_u16 v0, v[0:1], off offset:32
409 ; GISEL-NEXT: s_wait_loadcnt 0x0
410 ; GISEL-NEXT: global_store_b32 v[3:4], v0, off
411 ; GISEL-NEXT: s_endpgm
412 %gep1 = getelementptr i16, ptr addrspace(4) %in, i64 16
413 %zext1= zext i32 %offset to i64
414 %gep2 = getelementptr i16, ptr addrspace(4) %gep1, i64 %zext1
415 %ld = load i16, ptr addrspace(4) %gep2
416 %zext2= zext i16 %ld to i32
417 store i32 %zext2, ptr addrspace(1) %out
421 define amdgpu_ps void @s_buffer_load_byte_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out) {
422 ; GCN-LABEL: s_buffer_load_byte_imm_offset:
423 ; GCN: ; %bb.0: ; %main_body
424 ; GCN-NEXT: s_buffer_load_i8 s0, s[0:3], 0x4
425 ; GCN-NEXT: s_wait_kmcnt 0x0
426 ; GCN-NEXT: v_mov_b32_e32 v2, s0
427 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
430 %ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 4, i32 0)
431 %sext = sext i8 %ld to i32
432 store i32 %sext, ptr addrspace(1) %out
436 define amdgpu_ps void @s_buffer_load_byte_sgpr(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %offset) {
437 ; GCN-LABEL: s_buffer_load_byte_sgpr:
438 ; GCN: ; %bb.0: ; %main_body
439 ; GCN-NEXT: s_buffer_load_i8 s0, s[0:3], s4 offset:0x0
440 ; GCN-NEXT: s_wait_kmcnt 0x0
441 ; GCN-NEXT: v_mov_b32_e32 v2, s0
442 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
445 %ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
446 %sext = sext i8 %ld to i32
447 store i32 %sext, ptr addrspace(1) %out
451 define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %in) {
452 ; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset:
453 ; GCN: ; %bb.0: ; %main_body
454 ; GCN-NEXT: s_buffer_load_i8 s0, s[0:3], s4 offset:0x64
455 ; GCN-NEXT: s_wait_kmcnt 0x0
456 ; GCN-NEXT: v_mov_b32_e32 v2, s0
457 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
460 %off = add nuw nsw i32 %in, 100
461 %ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %off, i32 0)
462 %sext = sext i8 %ld to i32
463 store i32 %sext, ptr addrspace(1) %out
467 define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
468 ; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
469 ; DAG: ; %bb.0: ; %main_body
470 ; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
471 ; DAG-NEXT: s_wait_loadcnt 0x0
472 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
475 ; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
476 ; GISEL: ; %bb.0: ; %main_body
477 ; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
478 ; GISEL-NEXT: s_wait_loadcnt 0x0
479 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
480 ; GISEL-NEXT: s_endpgm
482 %ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
483 %sext = sext i8 %ld to i32
484 store i32 %sext, ptr addrspace(1) %out
488 define amdgpu_ps void @s_buffer_load_ubyte_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out) {
489 ; GCN-LABEL: s_buffer_load_ubyte_imm_offset:
490 ; GCN: ; %bb.0: ; %main_body
491 ; GCN-NEXT: s_buffer_load_u8 s0, s[0:3], 0x4
492 ; GCN-NEXT: s_wait_kmcnt 0x0
493 ; GCN-NEXT: s_and_b32 s0, s0, 0xff
494 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
495 ; GCN-NEXT: v_mov_b32_e32 v2, s0
496 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
499 %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 4, i32 0)
500 %zext = zext i8 %ld to i32
501 store i32 %zext, ptr addrspace(1) %out
505 define amdgpu_ps void @s_buffer_load_ubyte_sgpr(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %offset) {
506 ; GCN-LABEL: s_buffer_load_ubyte_sgpr:
507 ; GCN: ; %bb.0: ; %main_body
508 ; GCN-NEXT: s_buffer_load_u8 s0, s[0:3], s4 offset:0x0
509 ; GCN-NEXT: s_wait_kmcnt 0x0
510 ; GCN-NEXT: s_and_b32 s0, s0, 0xff
511 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
512 ; GCN-NEXT: v_mov_b32_e32 v2, s0
513 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
516 %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
517 %zext = zext i8 %ld to i32
518 store i32 %zext, ptr addrspace(1) %out
522 define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %in) {
523 ; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset:
524 ; GCN: ; %bb.0: ; %main_body
525 ; GCN-NEXT: s_buffer_load_u8 s0, s[0:3], s4 offset:0x64
526 ; GCN-NEXT: s_wait_kmcnt 0x0
527 ; GCN-NEXT: s_and_b32 s0, s0, 0xff
528 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
529 ; GCN-NEXT: v_mov_b32_e32 v2, s0
530 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
533 %off = add nuw nsw i32 %in, 100
534 %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %off, i32 0)
535 %zext = zext i8 %ld to i32
536 store i32 %zext, ptr addrspace(1) %out
540 define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
541 ; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
542 ; DAG: ; %bb.0: ; %main_body
543 ; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
544 ; DAG-NEXT: s_wait_loadcnt 0x0
545 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
548 ; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
549 ; GISEL: ; %bb.0: ; %main_body
550 ; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
551 ; GISEL-NEXT: s_wait_loadcnt 0x0
552 ; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
553 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
554 ; GISEL-NEXT: s_endpgm
556 %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
557 %zext = zext i8 %ld to i32
558 store i32 %zext, ptr addrspace(1) %out
562 define amdgpu_ps void @s_buffer_load_short_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out) {
563 ; GCN-LABEL: s_buffer_load_short_imm_offset:
564 ; GCN: ; %bb.0: ; %main_body
565 ; GCN-NEXT: s_buffer_load_i16 s0, s[0:3], 0x4
566 ; GCN-NEXT: s_wait_kmcnt 0x0
567 ; GCN-NEXT: v_mov_b32_e32 v2, s0
568 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
571 %ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 4, i32 0)
572 %sext = sext i16 %ld to i32
573 store i32 %sext, ptr addrspace(1) %out
577 define amdgpu_ps void @s_buffer_load_short_sgpr(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %offset) {
578 ; GCN-LABEL: s_buffer_load_short_sgpr:
579 ; GCN: ; %bb.0: ; %main_body
580 ; GCN-NEXT: s_buffer_load_i16 s0, s[0:3], s4 offset:0x0
581 ; GCN-NEXT: s_wait_kmcnt 0x0
582 ; GCN-NEXT: v_mov_b32_e32 v2, s0
583 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
586 %ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
587 %sext = sext i16 %ld to i32
588 store i32 %sext, ptr addrspace(1) %out
592 define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %in) {
593 ; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset:
594 ; GCN: ; %bb.0: ; %main_body
595 ; GCN-NEXT: s_buffer_load_i16 s0, s[0:3], s4 offset:0x64
596 ; GCN-NEXT: s_wait_kmcnt 0x0
597 ; GCN-NEXT: v_mov_b32_e32 v2, s0
598 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
601 %off = add nuw nsw i32 %in, 100
602 %ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %off, i32 0)
603 %sext = sext i16 %ld to i32
604 store i32 %sext, ptr addrspace(1) %out
608 define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
609 ; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
610 ; DAG: ; %bb.0: ; %main_body
611 ; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
612 ; DAG-NEXT: s_wait_loadcnt 0x0
613 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
616 ; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
617 ; GISEL: ; %bb.0: ; %main_body
618 ; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
619 ; GISEL-NEXT: s_wait_loadcnt 0x0
620 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
621 ; GISEL-NEXT: s_endpgm
623 %ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
624 %sext = sext i16 %ld to i32
625 store i32 %sext, ptr addrspace(1) %out
629 define amdgpu_ps void @s_buffer_load_ushort_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out) {
630 ; GCN-LABEL: s_buffer_load_ushort_imm_offset:
631 ; GCN: ; %bb.0: ; %main_body
632 ; GCN-NEXT: s_buffer_load_u16 s0, s[0:3], 0x4
633 ; GCN-NEXT: s_wait_kmcnt 0x0
634 ; GCN-NEXT: s_and_b32 s0, s0, 0xffff
635 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
636 ; GCN-NEXT: v_mov_b32_e32 v2, s0
637 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
640 %ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 4, i32 0)
641 %zext = zext i16 %ld to i32
642 store i32 %zext, ptr addrspace(1) %out
646 define amdgpu_ps void @s_buffer_load_ushort_sgpr(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %offset) {
647 ; GCN-LABEL: s_buffer_load_ushort_sgpr:
648 ; GCN: ; %bb.0: ; %main_body
649 ; GCN-NEXT: s_buffer_load_u16 s0, s[0:3], s4 offset:0x0
650 ; GCN-NEXT: s_wait_kmcnt 0x0
651 ; GCN-NEXT: s_and_b32 s0, s0, 0xffff
652 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
653 ; GCN-NEXT: v_mov_b32_e32 v2, s0
654 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
657 %ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
658 %zext = zext i16 %ld to i32
659 store i32 %zext, ptr addrspace(1) %out
663 define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 inreg %in) {
664 ; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset:
665 ; GCN: ; %bb.0: ; %main_body
666 ; GCN-NEXT: s_buffer_load_u16 s0, s[0:3], s4 offset:0x64
667 ; GCN-NEXT: s_wait_kmcnt 0x0
668 ; GCN-NEXT: s_and_b32 s0, s0, 0xffff
669 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
670 ; GCN-NEXT: v_mov_b32_e32 v2, s0
671 ; GCN-NEXT: global_store_b32 v[0:1], v2, off
674 %off = add nuw nsw i32 %in, 100
675 %ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %off, i32 0)
676 %zext = zext i16 %ld to i32
677 store i32 %zext, ptr addrspace(1) %out
681 define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
682 ; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
683 ; DAG: ; %bb.0: ; %main_body
684 ; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
685 ; DAG-NEXT: s_wait_loadcnt 0x0
686 ; DAG-NEXT: global_store_b32 v[0:1], v2, off
689 ; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
690 ; GISEL: ; %bb.0: ; %main_body
691 ; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
692 ; GISEL-NEXT: s_wait_loadcnt 0x0
693 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
694 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off
695 ; GISEL-NEXT: s_endpgm
697 %ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
698 %zext = zext i16 %ld to i32
699 store i32 %zext, ptr addrspace(1) %out
703 declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32)
704 declare i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32>, i32, i32)
705 declare i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32>, i32, i32)
706 declare i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32>, i32, i32)