1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
6 ; TODO: Merge with DAG test
8 define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) {
9 ; CI-LABEL: is_local_vgpr:
11 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
12 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
13 ; CI-NEXT: s_waitcnt lgkmcnt(0)
14 ; CI-NEXT: v_mov_b32_e32 v0, s0
15 ; CI-NEXT: v_mov_b32_e32 v1, s1
16 ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
17 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
18 ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
19 ; CI-NEXT: s_waitcnt vmcnt(0)
20 ; CI-NEXT: s_load_dword s0, s[4:5], 0x10
21 ; CI-NEXT: s_waitcnt lgkmcnt(0)
22 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
23 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
24 ; CI-NEXT: flat_store_dword v[0:1], v0
27 ; GFX9-LABEL: is_local_vgpr:
29 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
30 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
31 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
32 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
33 ; GFX9-NEXT: s_waitcnt vmcnt(0)
34 ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
35 ; GFX9-NEXT: s_lshl_b32 s0, s0, 16
36 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
37 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
38 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
41 ; GFX10-LABEL: is_local_vgpr:
43 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
44 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
45 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
46 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
47 ; GFX10-NEXT: s_waitcnt vmcnt(0)
48 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
49 ; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
50 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16
51 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
52 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
53 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
54 ; GFX10-NEXT: s_endpgm
55 %id = call i32 @llvm.amdgcn.workitem.id.x()
56 %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id
57 %ptr = load volatile i8*, i8* addrspace(1)* %gep
58 %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
59 %ext = zext i1 %val to i32
60 store i32 %ext, i32 addrspace(1)* undef
64 define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
65 ; CI-LABEL: is_local_sgpr:
67 ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
68 ; CI-NEXT: s_waitcnt lgkmcnt(0)
69 ; CI-NEXT: s_load_dword s0, s[4:5], 0x10
70 ; CI-NEXT: s_waitcnt lgkmcnt(0)
71 ; CI-NEXT: s_cmp_lg_u32 s1, s0
72 ; CI-NEXT: s_cbranch_scc1 .LBB1_2
73 ; CI-NEXT: ; %bb.1: ; %bb0
74 ; CI-NEXT: v_mov_b32_e32 v0, 0
75 ; CI-NEXT: flat_store_dword v[0:1], v0
76 ; CI-NEXT: s_waitcnt vmcnt(0)
77 ; CI-NEXT: .LBB1_2: ; %bb1
80 ; GFX9-LABEL: is_local_sgpr:
82 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
83 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
84 ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
85 ; GFX9-NEXT: s_lshl_b32 s0, s0, 16
86 ; GFX9-NEXT: s_cmp_lg_u32 s1, s0
87 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
88 ; GFX9-NEXT: ; %bb.1: ; %bb0
89 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
90 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
91 ; GFX9-NEXT: s_waitcnt vmcnt(0)
92 ; GFX9-NEXT: .LBB1_2: ; %bb1
95 ; GFX10-LABEL: is_local_sgpr:
97 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
98 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
99 ; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
100 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16
101 ; GFX10-NEXT: s_cmp_lg_u32 s1, s0
102 ; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
103 ; GFX10-NEXT: ; %bb.1: ; %bb0
104 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
105 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
106 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
107 ; GFX10-NEXT: .LBB1_2: ; %bb1
108 ; GFX10-NEXT: s_endpgm
109 %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
110 br i1 %val, label %bb0, label %bb1
113 store volatile i32 0, i32 addrspace(1)* undef
120 declare i32 @llvm.amdgcn.workitem.id.x() #0
121 declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #0
123 attributes #0 = { nounwind readnone speculatable }