1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}v_ubfe_sub_i32:
5 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
6 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
7 ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
8 define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
13 %src = load volatile i32, i32 addrspace(1)* %in0.gep
14 %width = load volatile i32, i32 addrspace(1)* %in0.gep
15 %sub = sub i32 32, %width
16 %shl = shl i32 %src, %sub
17 %bfe = lshr i32 %shl, %sub
18 store i32 %bfe, i32 addrspace(1)* %out.gep
22 ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
23 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
24 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
25 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
27 ; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
28 ; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
32 define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
33 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
34 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
35 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
36 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
37 %src = load volatile i32, i32 addrspace(1)* %in0.gep
38 %width = load volatile i32, i32 addrspace(1)* %in0.gep
39 %sub = sub i32 32, %width
40 %shl = shl i32 %src, %sub
41 %bfe = lshr i32 %shl, %sub
42 store i32 %bfe, i32 addrspace(1)* %out.gep
43 store volatile i32 %shl, i32 addrspace(1)* undef
47 ; GCN-LABEL: {{^}}s_ubfe_sub_i32:
48 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
49 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
50 ; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
51 define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
52 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
53 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
54 %sub = sub i32 32, %width
55 %shl = shl i32 %src, %sub
56 %bfe = lshr i32 %shl, %sub
57 store i32 %bfe, i32 addrspace(1)* %out.gep
61 ; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
62 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
63 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
64 ; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
65 ; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
66 define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
67 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
68 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
69 %sub = sub i32 32, %width
70 %shl = shl i32 %src, %sub
71 %bfe = lshr i32 %shl, %sub
72 store i32 %bfe, i32 addrspace(1)* %out.gep
73 store volatile i32 %shl, i32 addrspace(1)* undef
77 ; GCN-LABEL: {{^}}v_sbfe_sub_i32:
78 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
79 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
80 ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
81 define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
82 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
83 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
84 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
85 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
86 %src = load volatile i32, i32 addrspace(1)* %in0.gep
87 %width = load volatile i32, i32 addrspace(1)* %in0.gep
88 %sub = sub i32 32, %width
89 %shl = shl i32 %src, %sub
90 %bfe = ashr i32 %shl, %sub
91 store i32 %bfe, i32 addrspace(1)* %out.gep
95 ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
96 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
97 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
98 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
100 ; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
101 ; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
105 define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
106 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
107 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
108 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
109 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
110 %src = load volatile i32, i32 addrspace(1)* %in0.gep
111 %width = load volatile i32, i32 addrspace(1)* %in0.gep
112 %sub = sub i32 32, %width
113 %shl = shl i32 %src, %sub
114 %bfe = ashr i32 %shl, %sub
115 store i32 %bfe, i32 addrspace(1)* %out.gep
116 store volatile i32 %shl, i32 addrspace(1)* undef
120 ; GCN-LABEL: {{^}}s_sbfe_sub_i32:
121 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
122 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
123 ; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
124 define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
125 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
126 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
127 %sub = sub i32 32, %width
128 %shl = shl i32 %src, %sub
129 %bfe = ashr i32 %shl, %sub
130 store i32 %bfe, i32 addrspace(1)* %out.gep
134 ; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
135 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
136 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
137 ; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
138 ; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
139 define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
140 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
141 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
142 %sub = sub i32 32, %width
143 %shl = shl i32 %src, %sub
144 %bfe = ashr i32 %shl, %sub
145 store i32 %bfe, i32 addrspace(1)* %out.gep
146 store volatile i32 %shl, i32 addrspace(1)* undef
150 declare i32 @llvm.amdgcn.workitem.id.x() #0
152 attributes #0 = { nounwind readnone }
153 attributes #1 = { nounwind }