1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; FIXME: Test should be redundant with constant-address-space-32bit.ll
7 ; It's important to check with gfx8 and gfx9 to check access through global and flat.
9 ; Custom lowering needs to swap out the MMO address space
10 define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) {
11 ; GFX6-LABEL: load_constant32bit_vgpr_offset:
12 ; GFX6: ; %bb.0: ; %entry
13 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
14 ; GFX6-NEXT: s_mov_b32 s2, 0
15 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
16 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
17 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
18 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
19 ; GFX6-NEXT: s_waitcnt vmcnt(0)
20 ; GFX6-NEXT: ; return to shader part epilog
22 ; GFX8-LABEL: load_constant32bit_vgpr_offset:
23 ; GFX8: ; %bb.0: ; %entry
24 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
25 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
26 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
27 ; GFX8-NEXT: s_waitcnt vmcnt(0)
28 ; GFX8-NEXT: ; return to shader part epilog
30 ; GFX9-LABEL: load_constant32bit_vgpr_offset:
31 ; GFX9: ; %bb.0: ; %entry
32 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
33 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
34 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
35 ; GFX9-NEXT: s_waitcnt vmcnt(0)
36 ; GFX9-NEXT: ; return to shader part epilog
38 %gep = getelementptr <{ [4294967295 x float] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg
39 %load = load float, ptr addrspace(6) %gep, align 4
43 define amdgpu_ps i32 @load_constant32bit_sgpr_offset(i32 inreg %arg) {
44 ; GCN-LABEL: load_constant32bit_sgpr_offset:
45 ; GCN: ; %bb.0: ; %entry
46 ; GCN-NEXT: s_lshl_b32 s0, s0, 2
47 ; GCN-NEXT: s_mov_b32 s1, 0
48 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
49 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
50 ; GCN-NEXT: ; return to shader part epilog
52 %gep = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg
53 %load = load i32, ptr addrspace(6) %gep, align 4
57 ; This gets split during regbankselect
58 define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %arg) {
59 ; GFX6-LABEL: load_constant32bit_vgpr_v8f32:
60 ; GFX6: ; %bb.0: ; %entry
61 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
62 ; GFX6-NEXT: s_mov_b32 s2, 0
63 ; GFX6-NEXT: v_mov_b32_e32 v5, 0
64 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
65 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
66 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
67 ; GFX6-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[0:3], 0 addr64 offset:16
68 ; GFX6-NEXT: s_waitcnt vmcnt(0)
69 ; GFX6-NEXT: ; return to shader part epilog
71 ; GFX8-LABEL: load_constant32bit_vgpr_v8f32:
72 ; GFX8: ; %bb.0: ; %entry
73 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
74 ; GFX8-NEXT: v_mov_b32_e32 v5, 0
75 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[4:5]
76 ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 16, v4
77 ; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
78 ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
79 ; GFX8-NEXT: s_waitcnt vmcnt(0)
80 ; GFX8-NEXT: ; return to shader part epilog
82 ; GFX9-LABEL: load_constant32bit_vgpr_v8f32:
83 ; GFX9: ; %bb.0: ; %entry
84 ; GFX9-NEXT: v_mov_b32_e32 v8, v0
85 ; GFX9-NEXT: v_mov_b32_e32 v9, 0
86 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[8:9], off
87 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16
88 ; GFX9-NEXT: s_waitcnt vmcnt(0)
89 ; GFX9-NEXT: ; return to shader part epilog
91 %load = load <8 x float>, ptr addrspace(6) %arg, align 32