1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; Test that add/sub with a constant is swapped to sub/add with negated
5 ; constant to minimize code size.
7 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
8 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
9 ; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
10 define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
11 %tid = call i32 @llvm.amdgcn.workitem.id.x()
12 %tid.ext = sext i32 %tid to i64
13 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
14 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
15 %x = load i32, i32 addrspace(1)* %gep
16 %result = sub i32 %x, 64
17 store i32 %result, i32 addrspace(1)* %gep.out
21 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use:
22 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
23 ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
24 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
25 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
26 define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
27 %tid = call i32 @llvm.amdgcn.workitem.id.x()
28 %tid.ext = sext i32 %tid to i64
29 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
30 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
31 %x = load volatile i32, i32 addrspace(1)* %gep
32 %y = load volatile i32, i32 addrspace(1)* %gep
33 %result0 = sub i32 %x, 64
34 %result1 = sub i32 %y, 64
35 store volatile i32 %result0, i32 addrspace(1)* %gep.out
36 store volatile i32 %result1, i32 addrspace(1)* %gep.out
40 ; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
41 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
42 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
43 define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
44 %tid = call i32 @llvm.amdgcn.workitem.id.x()
45 %tid.ext = sext i32 %tid to i64
46 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
47 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
48 %x = load i32, i32 addrspace(1)* %gep
49 %result = sub i32 64, %x
50 store i32 %result, i32 addrspace(1)* %gep.out
54 ; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
55 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
56 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
57 define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
58 %tid = call i32 @llvm.amdgcn.workitem.id.x()
59 %tid.ext = sext i32 %tid to i64
60 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
61 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
62 %x = load i32, i32 addrspace(1)* %gep
63 %result = sub i32 %x, 65
64 store i32 %result, i32 addrspace(1)* %gep.out
68 ; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
69 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
70 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
71 define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
72 %tid = call i32 @llvm.amdgcn.workitem.id.x()
73 %tid.ext = sext i32 %tid to i64
74 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
75 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
76 %x = load i32, i32 addrspace(1)* %gep
77 %result = sub i32 65, %x
78 store i32 %result, i32 addrspace(1)* %gep.out
82 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
83 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
84 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
85 define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
86 %tid = call i32 @llvm.amdgcn.workitem.id.x()
87 %tid.ext = sext i32 %tid to i64
88 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
89 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
90 %x = load i32, i32 addrspace(1)* %gep
91 %result = sub i32 %x, -16
92 store i32 %result, i32 addrspace(1)* %gep.out
96 ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
97 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
98 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
99 define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
100 %tid = call i32 @llvm.amdgcn.workitem.id.x()
101 %tid.ext = sext i32 %tid to i64
102 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
103 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
104 %x = load i32, i32 addrspace(1)* %gep
105 %result = sub i32 -16, %x
106 store i32 %result, i32 addrspace(1)* %gep.out
110 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
111 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
112 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
113 define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
114 %tid = call i32 @llvm.amdgcn.workitem.id.x()
115 %tid.ext = sext i32 %tid to i64
116 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
117 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
118 %x = load i32, i32 addrspace(1)* %gep
119 %result = sub i32 %x, -17
120 store i32 %result, i32 addrspace(1)* %gep.out
124 ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
125 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
126 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
127 define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
128 %tid = call i32 @llvm.amdgcn.workitem.id.x()
129 %tid.ext = sext i32 %tid to i64
130 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
131 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
132 %x = load i32, i32 addrspace(1)* %gep
133 %result = sub i32 -17, %x
134 store i32 %result, i32 addrspace(1)* %gep.out
138 ; GCN-LABEL: {{^}}s_test_i32_x_sub_64:
139 ; GCN: s_load_dword [[X:s[0-9]+]]
140 ; GCN: s_sub_i32 s{{[0-9]+}}, [[X]], 64
141 define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
142 %result = sub i32 %x, 64
143 call void asm sideeffect "; use $0", "s"(i32 %result)
147 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64:
148 ; VI: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
149 ; VI: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
150 define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
151 %tid = call i32 @llvm.amdgcn.workitem.id.x()
152 %tid.ext = sext i32 %tid to i64
153 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
154 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
155 %x = load i16, i16 addrspace(1)* %gep
156 %result = sub i16 %x, 64
157 store i16 %result, i16 addrspace(1)* %gep.out
161 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64_multi_use:
162 ; GCN: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
163 ; GCN: {{buffer|flat}}_load_ushort [[Y:v[0-9]+]]
164 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
165 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[Y]]
167 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
168 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
169 define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
170 %tid = call i32 @llvm.amdgcn.workitem.id.x()
171 %tid.ext = sext i32 %tid to i64
172 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
173 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
174 %x = load volatile i16, i16 addrspace(1)* %gep
175 %y = load volatile i16, i16 addrspace(1)* %gep
176 %result0 = sub i16 %x, 64
177 %result1 = sub i16 %y, 64
178 store volatile i16 %result0, i16 addrspace(1)* %gep.out
179 store volatile i16 %result1, i16 addrspace(1)* %gep.out
183 declare i32 @llvm.amdgcn.workitem.id.x() #1
185 attributes #0 = { nounwind }
186 attributes #1 = { nounwind readnone }