1 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; GCN-LABEL: {{^}}fold_mi_v_and_0:
5 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
7 ; GCN: buffer_store_dword [[RESULT]]
8 define amdgpu_kernel void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
9 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
10 %size = call i32 @llvm.amdgcn.groupstaticsize()
11 %and = and i32 %size, %x
12 store i32 %and, i32 addrspace(1)* %out
16 ; GCN-LABEL: {{^}}fold_mi_s_and_0:
17 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
19 ; GCN: buffer_store_dword [[RESULT]]
20 define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
21 %size = call i32 @llvm.amdgcn.groupstaticsize()
22 %and = and i32 %size, %x
23 store i32 %and, i32 addrspace(1)* %out
27 ; GCN-LABEL: {{^}}fold_mi_v_or_0:
28 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
30 ; GCN: buffer_store_dword [[RESULT]]
31 define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
32 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
33 %size = call i32 @llvm.amdgcn.groupstaticsize()
34 %or = or i32 %size, %x
35 store i32 %or, i32 addrspace(1)* %out
39 ; GCN-LABEL: {{^}}fold_mi_s_or_0:
40 ; GCN: s_load_dword [[SVAL:s[0-9]+]]
42 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
44 ; GCN: buffer_store_dword [[VVAL]]
45 define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
46 %size = call i32 @llvm.amdgcn.groupstaticsize()
47 %or = or i32 %size, %x
48 store i32 %or, i32 addrspace(1)* %out
52 ; GCN-LABEL: {{^}}fold_mi_v_xor_0:
53 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
55 ; GCN: buffer_store_dword [[RESULT]]
56 define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
57 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
58 %size = call i32 @llvm.amdgcn.groupstaticsize()
59 %xor = xor i32 %size, %x
60 store i32 %xor, i32 addrspace(1)* %out
64 ; GCN-LABEL: {{^}}fold_mi_s_xor_0:
65 ; GCN: s_load_dword [[SVAL:s[0-9]+]]
67 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
69 ; GCN: buffer_store_dword [[VVAL]]
70 define amdgpu_kernel void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
71 %size = call i32 @llvm.amdgcn.groupstaticsize()
72 %xor = xor i32 %size, %x
73 store i32 %xor, i32 addrspace(1)* %out
77 ; GCN-LABEL: {{^}}fold_mi_s_not_0:
78 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}}
80 ; GCN: buffer_store_dword [[RESULT]]
81 define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
82 %size = call i32 @llvm.amdgcn.groupstaticsize()
83 %xor = xor i32 %size, -1
84 store i32 %xor, i32 addrspace(1)* %out
88 ; GCN-LABEL: {{^}}fold_mi_v_not_0:
89 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
90 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
91 ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
92 ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
93 ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
94 define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
95 %vreg = load volatile i64, i64 addrspace(1)* undef
96 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
97 %xor = xor i64 %ctpop, -1
98 store i64 %xor, i64 addrspace(1)* %out
102 ; The neg1 appears after folding the not 0
103 ; GCN-LABEL: {{^}}fold_mi_or_neg1:
104 ; GCN: buffer_load_dwordx2
105 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
107 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
108 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
109 ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
110 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
111 ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
112 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
113 define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
114 %vreg0 = load volatile i64, i64 addrspace(1)* undef
115 %vreg1 = load volatile i64, i64 addrspace(1)* undef
116 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
117 %xor = xor i64 %ctpop, -1
118 %or = or i64 %xor, %vreg1
119 store i64 %or, i64 addrspace(1)* %out
123 ; GCN-LABEL: {{^}}fold_mi_and_neg1:
124 ; GCN: v_bcnt_u32_b32
125 ; GCN: v_bcnt_u32_b32
129 define amdgpu_kernel void @fold_mi_and_neg1(i64 addrspace(1)* %out) {
130 %vreg0 = load volatile i64, i64 addrspace(1)* undef
131 %vreg1 = load volatile i64, i64 addrspace(1)* undef
132 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
133 %xor = xor i64 %ctpop, -1
134 %and = and i64 %xor, %vreg1
135 store i64 %and, i64 addrspace(1)* %out
139 declare i64 @llvm.ctpop.i64(i64) #1
140 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
141 declare i32 @llvm.amdgcn.groupstaticsize() #1
143 attributes #0 = { nounwind }
144 attributes #1 = { nounwind readnone }