1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
7 ; TODO: Merge with DAG test
9 define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
10 ; CI-LABEL: is_local_vgpr:
12 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
13 ; CI-NEXT: s_load_dword s2, s[6:7], 0x33
14 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
15 ; CI-NEXT: s_waitcnt lgkmcnt(0)
16 ; CI-NEXT: v_mov_b32_e32 v0, s0
17 ; CI-NEXT: v_mov_b32_e32 v1, s1
18 ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
19 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
20 ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
21 ; CI-NEXT: s_waitcnt vmcnt(0)
22 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
23 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
24 ; CI-NEXT: flat_store_dword v[0:1], v0
27 ; GFX9-LABEL: is_local_vgpr:
29 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
30 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
31 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
32 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
33 ; GFX9-NEXT: s_waitcnt vmcnt(0)
34 ; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base
35 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
36 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
37 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
40 ; GFX10-LABEL: is_local_vgpr:
42 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
43 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
44 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
45 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
46 ; GFX10-NEXT: s_waitcnt vmcnt(0)
47 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
48 ; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
49 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
50 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
51 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
52 ; GFX10-NEXT: s_endpgm
54 ; GFX11-LABEL: is_local_vgpr:
56 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
57 ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
58 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
59 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
60 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
62 ; GFX11-NEXT: s_waitcnt vmcnt(0)
63 ; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
64 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
65 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
66 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off
68 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
69 ; GFX11-NEXT: s_endpgm
70 %id = call i32 @llvm.amdgcn.workitem.id.x()
71 %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
72 %ptr = load volatile ptr, ptr addrspace(1) %gep
73 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
74 %ext = zext i1 %val to i32
75 store i32 %ext, ptr addrspace(1) undef
79 define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
80 ; CI-LABEL: is_local_sgpr:
82 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
83 ; CI-NEXT: s_waitcnt lgkmcnt(0)
84 ; CI-NEXT: s_load_dword s0, s[6:7], 0x33
85 ; CI-NEXT: s_waitcnt lgkmcnt(0)
86 ; CI-NEXT: s_cmp_lg_u32 s1, s0
87 ; CI-NEXT: s_cbranch_scc1 .LBB1_2
88 ; CI-NEXT: ; %bb.1: ; %bb0
89 ; CI-NEXT: v_mov_b32_e32 v0, 0
90 ; CI-NEXT: flat_store_dword v[0:1], v0
91 ; CI-NEXT: s_waitcnt vmcnt(0)
92 ; CI-NEXT: .LBB1_2: ; %bb1
95 ; GFX9-LABEL: is_local_sgpr:
97 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
98 ; GFX9-NEXT: s_mov_b64 s[2:3], src_shared_base
99 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
100 ; GFX9-NEXT: s_cmp_lg_u32 s1, s3
101 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
102 ; GFX9-NEXT: ; %bb.1: ; %bb0
103 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
104 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
105 ; GFX9-NEXT: s_waitcnt vmcnt(0)
106 ; GFX9-NEXT: .LBB1_2: ; %bb1
107 ; GFX9-NEXT: s_endpgm
109 ; GFX10-LABEL: is_local_sgpr:
111 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
112 ; GFX10-NEXT: s_mov_b64 s[2:3], src_shared_base
113 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
114 ; GFX10-NEXT: s_cmp_lg_u32 s1, s3
115 ; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
116 ; GFX10-NEXT: ; %bb.1: ; %bb0
117 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
119 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
120 ; GFX10-NEXT: .LBB1_2: ; %bb1
121 ; GFX10-NEXT: s_endpgm
123 ; GFX11-LABEL: is_local_sgpr:
125 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
126 ; GFX11-NEXT: s_mov_b64 s[2:3], src_shared_base
127 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX11-NEXT: s_cmp_lg_u32 s1, s3
129 ; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
130 ; GFX11-NEXT: ; %bb.1: ; %bb0
131 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
132 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
133 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
134 ; GFX11-NEXT: .LBB1_2: ; %bb1
135 ; GFX11-NEXT: s_endpgm
136 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
137 br i1 %val, label %bb0, label %bb1
140 store volatile i32 0, ptr addrspace(1) undef
147 declare i32 @llvm.amdgcn.workitem.id.x() #0
148 declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
150 attributes #0 = { nounwind readnone speculatable }
152 !llvm.module.flags = !{!0}
153 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}