1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
7 ; TODO: Merge with DAG test
9 define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
10 ; CI-LABEL: is_private_vgpr:
12 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
13 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
14 ; CI-NEXT: s_waitcnt lgkmcnt(0)
15 ; CI-NEXT: v_mov_b32_e32 v0, s0
16 ; CI-NEXT: v_mov_b32_e32 v1, s1
17 ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
18 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
19 ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
20 ; CI-NEXT: s_waitcnt vmcnt(0)
21 ; CI-NEXT: s_load_dword s0, s[4:5], 0x11
22 ; CI-NEXT: s_waitcnt lgkmcnt(0)
23 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
24 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
25 ; CI-NEXT: flat_store_dword v[0:1], v0
28 ; GFX9-LABEL: is_private_vgpr:
30 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
31 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
33 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
34 ; GFX9-NEXT: s_waitcnt vmcnt(0)
35 ; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
36 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
37 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
38 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
41 ; GFX10-LABEL: is_private_vgpr:
43 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
44 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
45 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
46 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
47 ; GFX10-NEXT: s_waitcnt vmcnt(0)
48 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
49 ; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
50 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
51 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
52 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
53 ; GFX10-NEXT: s_endpgm
55 ; GFX11-LABEL: is_private_vgpr:
57 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
58 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
59 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
60 ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
61 ; GFX11-NEXT: s_waitcnt vmcnt(0)
62 ; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
63 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
64 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
65 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
66 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off
68 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
69 ; GFX11-NEXT: s_endpgm
70 %id = call i32 @llvm.amdgcn.workitem.id.x()
71 %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
72 %ptr = load volatile ptr, ptr addrspace(1) %gep
73 %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
74 %ext = zext i1 %val to i32
75 store i32 %ext, ptr addrspace(1) undef
79 define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
80 ; CI-LABEL: is_private_sgpr:
82 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
83 ; CI-NEXT: s_waitcnt lgkmcnt(0)
84 ; CI-NEXT: s_load_dword s0, s[4:5], 0x11
85 ; CI-NEXT: s_waitcnt lgkmcnt(0)
86 ; CI-NEXT: s_cmp_lg_u32 s1, s0
87 ; CI-NEXT: s_cbranch_scc1 .LBB1_2
88 ; CI-NEXT: ; %bb.1: ; %bb0
89 ; CI-NEXT: v_mov_b32_e32 v0, 0
90 ; CI-NEXT: flat_store_dword v[0:1], v0
91 ; CI-NEXT: s_waitcnt vmcnt(0)
92 ; CI-NEXT: .LBB1_2: ; %bb1
95 ; GFX9-LABEL: is_private_sgpr:
97 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
98 ; GFX9-NEXT: s_mov_b64 s[2:3], src_private_base
99 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
100 ; GFX9-NEXT: s_cmp_lg_u32 s1, s3
101 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
102 ; GFX9-NEXT: ; %bb.1: ; %bb0
103 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
104 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
105 ; GFX9-NEXT: s_waitcnt vmcnt(0)
106 ; GFX9-NEXT: .LBB1_2: ; %bb1
107 ; GFX9-NEXT: s_endpgm
109 ; GFX10-LABEL: is_private_sgpr:
111 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
112 ; GFX10-NEXT: s_mov_b64 s[2:3], src_private_base
113 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
114 ; GFX10-NEXT: s_cmp_lg_u32 s1, s3
115 ; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
116 ; GFX10-NEXT: ; %bb.1: ; %bb0
117 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
119 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
120 ; GFX10-NEXT: .LBB1_2: ; %bb1
121 ; GFX10-NEXT: s_endpgm
123 ; GFX11-LABEL: is_private_sgpr:
125 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
126 ; GFX11-NEXT: s_mov_b64 s[2:3], src_private_base
127 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX11-NEXT: s_cmp_lg_u32 s1, s3
129 ; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
130 ; GFX11-NEXT: ; %bb.1: ; %bb0
131 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
132 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
133 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
134 ; GFX11-NEXT: .LBB1_2: ; %bb1
135 ; GFX11-NEXT: s_nop 0
136 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
137 ; GFX11-NEXT: s_endpgm
138 %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
139 br i1 %val, label %bb0, label %bb1
142 store volatile i32 0, ptr addrspace(1) undef
149 declare i32 @llvm.amdgcn.workitem.id.x() #0
150 declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
152 attributes #0 = { nounwind readnone speculatable }