1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}v_ubfe_sub_i32:
5 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
6 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
7 ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
8 define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
13 %src = load volatile i32, i32 addrspace(1)* %in0.gep
14 %width = load volatile i32, i32 addrspace(1)* %in0.gep
15 %sub = sub i32 32, %width
16 %shl = shl i32 %src, %sub
17 %bfe = lshr i32 %shl, %sub
18 store i32 %bfe, i32 addrspace(1)* %out.gep
22 ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
23 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
24 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
25 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
27 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
28 ; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
30 ; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
31 ; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
35 define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
36 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
37 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
38 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
39 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
40 %src = load volatile i32, i32 addrspace(1)* %in0.gep
41 %width = load volatile i32, i32 addrspace(1)* %in0.gep
42 %sub = sub i32 32, %width
43 %shl = shl i32 %src, %sub
44 %bfe = lshr i32 %shl, %sub
45 store i32 %bfe, i32 addrspace(1)* %out.gep
46 store volatile i32 %shl, i32 addrspace(1)* undef
50 ; GCN-LABEL: {{^}}s_ubfe_sub_i32:
51 ; GCN: s_load_dword [[SRC:s[0-9]+]]
52 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
53 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
54 ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
55 define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
56 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
57 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
58 %sub = sub i32 32, %width
59 %shl = shl i32 %src, %sub
60 %bfe = lshr i32 %shl, %sub
61 store i32 %bfe, i32 addrspace(1)* %out.gep
65 ; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
66 ; GCN: s_load_dword [[SRC:s[0-9]+]]
67 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
68 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
69 ; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
70 ; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
71 define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
72 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
73 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
74 %sub = sub i32 32, %width
75 %shl = shl i32 %src, %sub
76 %bfe = lshr i32 %shl, %sub
77 store i32 %bfe, i32 addrspace(1)* %out.gep
78 store volatile i32 %shl, i32 addrspace(1)* undef
82 ; GCN-LABEL: {{^}}v_sbfe_sub_i32:
83 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
84 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
85 ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
86 define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
87 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
88 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
89 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
90 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
91 %src = load volatile i32, i32 addrspace(1)* %in0.gep
92 %width = load volatile i32, i32 addrspace(1)* %in0.gep
93 %sub = sub i32 32, %width
94 %shl = shl i32 %src, %sub
95 %bfe = ashr i32 %shl, %sub
96 store i32 %bfe, i32 addrspace(1)* %out.gep
100 ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
101 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
102 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
103 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
105 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
106 ; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
108 ; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
109 ; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
113 define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
114 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
115 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
116 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
118 %src = load volatile i32, i32 addrspace(1)* %in0.gep
119 %width = load volatile i32, i32 addrspace(1)* %in0.gep
120 %sub = sub i32 32, %width
121 %shl = shl i32 %src, %sub
122 %bfe = ashr i32 %shl, %sub
123 store i32 %bfe, i32 addrspace(1)* %out.gep
124 store volatile i32 %shl, i32 addrspace(1)* undef
128 ; GCN-LABEL: {{^}}s_sbfe_sub_i32:
129 ; GCN: s_load_dword [[SRC:s[0-9]+]]
130 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
131 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
132 ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
133 define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
134 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
135 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
136 %sub = sub i32 32, %width
137 %shl = shl i32 %src, %sub
138 %bfe = ashr i32 %shl, %sub
139 store i32 %bfe, i32 addrspace(1)* %out.gep
143 ; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
144 ; GCN: s_load_dword [[SRC:s[0-9]+]]
145 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
146 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
147 ; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
148 ; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
149 define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
150 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
151 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
152 %sub = sub i32 32, %width
153 %shl = shl i32 %src, %sub
154 %bfe = ashr i32 %shl, %sub
155 store i32 %bfe, i32 addrspace(1)* %out.gep
156 store volatile i32 %shl, i32 addrspace(1)* undef
160 declare i32 @llvm.amdgcn.workitem.id.x() #0
162 attributes #0 = { nounwind readnone }
163 attributes #1 = { nounwind }