1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine.
5 ; GCN-LABEL: {{^}}multi_use_fneg_src:
6 ; GCN: buffer_load_dword [[A:v[0-9]+]]
7 ; GCN: buffer_load_dword [[B:v[0-9]+]]
8 ; GCN: buffer_load_dword [[C:v[0-9]+]]
10 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
11 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
12 ; GCN: buffer_store_dword [[MUL]]
13 define amdgpu_kernel void @multi_use_fneg_src() #0 {
14 %a = load volatile float, ptr addrspace(1) undef
15 %b = load volatile float, ptr addrspace(1) undef
16 %x = load volatile i32, ptr addrspace(1) undef
17 %y = load volatile i32, ptr addrspace(1) undef
19 %mul = fmul float %a, %b
20 %neg.mul = fsub float -0.0, %mul
21 %cmp = fcmp oeq float %neg.mul, 4.0
22 %select = select i1 %cmp, i32 %x, i32 %y
23 store volatile i32 %select, ptr addrspace(1) undef
24 store volatile float %mul, ptr addrspace(1) undef
28 ; GCN-LABEL: {{^}}multi_foldable_use_fneg_src:
29 ; GCN: buffer_load_dword [[A:v[0-9]+]]
30 ; GCN: buffer_load_dword [[B:v[0-9]+]]
31 ; GCN: buffer_load_dword [[C:v[0-9]+]]
33 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
34 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
35 ; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
36 define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
37 %a = load volatile float, ptr addrspace(1) undef
38 %b = load volatile float, ptr addrspace(1) undef
39 %x = load volatile i32, ptr addrspace(1) undef
40 %y = load volatile i32, ptr addrspace(1) undef
42 %mul = fmul float %a, %b
43 %neg.mul = fsub float -0.0, %mul
44 %use1 = fmul float %mul, %neg.mul
45 %cmp = fcmp oeq float %neg.mul, 4.0
46 %select = select i1 %cmp, i32 %x, i32 %y
48 store volatile i32 %select, ptr addrspace(1) undef
49 store volatile float %use1, ptr addrspace(1) undef
53 ; GCN-LABEL: {{^}}multi_use_fneg:
54 ; GCN: buffer_load_dword [[A:v[0-9]+]]
55 ; GCN: buffer_load_dword [[B:v[0-9]+]]
56 ; GCN: buffer_load_dword [[C:v[0-9]+]]
58 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
59 ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
61 ; GCN: buffer_store_dword [[MUL]]
62 define amdgpu_kernel void @multi_use_fneg() #0 {
63 %a = load volatile float, ptr addrspace(1) undef
64 %b = load volatile float, ptr addrspace(1) undef
65 %x = load volatile i32, ptr addrspace(1) undef
66 %y = load volatile i32, ptr addrspace(1) undef
68 %mul = fmul float %a, %b
69 %neg.mul = fsub float -0.0, %mul
70 %cmp = fcmp oeq float %neg.mul, 4.0
71 %select = select i1 %cmp, i32 %x, i32 %y
72 store volatile i32 %select, ptr addrspace(1) undef
73 store volatile float %neg.mul, ptr addrspace(1) undef
77 ; GCN-LABEL: {{^}}multi_foldable_use_fneg:
78 ; GCN: buffer_load_dword [[A:v[0-9]+]]
79 ; GCN: buffer_load_dword [[B:v[0-9]+]]
81 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
82 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
83 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
84 ; GCN: buffer_store_dword [[MUL1]]
85 define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
86 %a = load volatile float, ptr addrspace(1) undef
87 %b = load volatile float, ptr addrspace(1) undef
88 %x = load volatile i32, ptr addrspace(1) undef
89 %y = load volatile i32, ptr addrspace(1) undef
90 %z = load volatile i32, ptr addrspace(1) undef
92 %mul = fmul float %a, %b
93 %neg.mul = fsub float -0.0, %mul
94 %cmp = fcmp oeq float %neg.mul, 4.0
95 %select = select i1 %cmp, i32 %x, i32 %y
96 %use1 = fmul float %neg.mul, %mul
97 store volatile i32 %select, ptr addrspace(1) undef
98 store volatile float %use1, ptr addrspace(1) undef
102 ; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
103 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
104 define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
105 %a = load volatile float, ptr addrspace(1) undef
106 %x = load volatile i32, ptr addrspace(1) undef
107 %y = load volatile i32, ptr addrspace(1) undef
108 %neg.a = fsub float -0.0, %a
109 %cmp = fcmp oeq float %neg.a, 4.0
110 %select = select i1 %cmp, i32 %x, i32 %y
111 store volatile i32 %select, ptr addrspace(1) undef
115 ; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
116 ; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
117 define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
118 %a = load volatile float, ptr addrspace(1) undef
119 %x = load volatile i32, ptr addrspace(1) undef
120 %y = load volatile i32, ptr addrspace(1) undef
121 %neg.a = fsub float -0.0, %a
122 %cmp = fcmp ogt float %neg.a, 4.0
123 %select = select i1 %cmp, i32 %x, i32 %y
124 store volatile i32 %select, ptr addrspace(1) undef
128 ; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
129 ; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
130 define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
131 %a = load volatile float, ptr addrspace(1) undef
132 %x = load volatile i32, ptr addrspace(1) undef
133 %y = load volatile i32, ptr addrspace(1) undef
134 %neg.a = fsub float -0.0, %a
135 %cmp = fcmp oge float %neg.a, 4.0
136 %select = select i1 %cmp, i32 %x, i32 %y
137 store volatile i32 %select, ptr addrspace(1) undef
141 ; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
142 ; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
143 define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
144 %a = load volatile float, ptr addrspace(1) undef
145 %x = load volatile i32, ptr addrspace(1) undef
146 %y = load volatile i32, ptr addrspace(1) undef
147 %neg.a = fsub float -0.0, %a
148 %cmp = fcmp olt float %neg.a, 4.0
149 %select = select i1 %cmp, i32 %x, i32 %y
150 store volatile i32 %select, ptr addrspace(1) undef
154 ; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
155 ; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
156 define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
157 %a = load volatile float, ptr addrspace(1) undef
158 %x = load volatile i32, ptr addrspace(1) undef
159 %y = load volatile i32, ptr addrspace(1) undef
160 %neg.a = fsub float -0.0, %a
161 %cmp = fcmp ole float %neg.a, 4.0
162 %select = select i1 %cmp, i32 %x, i32 %y
163 store volatile i32 %select, ptr addrspace(1) undef
167 ; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
168 ; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
169 define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
170 %a = load volatile float, ptr addrspace(1) undef
171 %x = load volatile i32, ptr addrspace(1) undef
172 %y = load volatile i32, ptr addrspace(1) undef
173 %neg.a = fsub float -0.0, %a
174 %cmp = fcmp one float %neg.a, 4.0
175 %select = select i1 %cmp, i32 %x, i32 %y
176 store volatile i32 %select, ptr addrspace(1) undef
180 ; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
181 ; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
182 define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
183 %a = load volatile float, ptr addrspace(1) undef
184 %x = load volatile i32, ptr addrspace(1) undef
185 %y = load volatile i32, ptr addrspace(1) undef
186 %neg.a = fsub float -0.0, %a
187 %cmp = fcmp ueq float %neg.a, 4.0
188 %select = select i1 %cmp, i32 %x, i32 %y
189 store volatile i32 %select, ptr addrspace(1) undef
193 ; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
194 ; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
195 define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
196 %a = load volatile float, ptr addrspace(1) undef
197 %x = load volatile i32, ptr addrspace(1) undef
198 %y = load volatile i32, ptr addrspace(1) undef
199 %neg.a = fsub float -0.0, %a
200 %cmp = fcmp ugt float %neg.a, 4.0
201 %select = select i1 %cmp, i32 %x, i32 %y
202 store volatile i32 %select, ptr addrspace(1) undef
206 ; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
207 ; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
208 define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
209 %a = load volatile float, ptr addrspace(1) undef
210 %x = load volatile i32, ptr addrspace(1) undef
211 %y = load volatile i32, ptr addrspace(1) undef
212 %neg.a = fsub float -0.0, %a
213 %cmp = fcmp uge float %neg.a, 4.0
214 %select = select i1 %cmp, i32 %x, i32 %y
215 store volatile i32 %select, ptr addrspace(1) undef
219 ; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
220 ; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
221 define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
222 %a = load volatile float, ptr addrspace(1) undef
223 %x = load volatile i32, ptr addrspace(1) undef
224 %y = load volatile i32, ptr addrspace(1) undef
225 %neg.a = fsub float -0.0, %a
226 %cmp = fcmp ult float %neg.a, 4.0
227 %select = select i1 %cmp, i32 %x, i32 %y
228 store volatile i32 %select, ptr addrspace(1) undef
232 ; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
233 ; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
234 define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
235 %a = load volatile float, ptr addrspace(1) undef
236 %x = load volatile i32, ptr addrspace(1) undef
237 %y = load volatile i32, ptr addrspace(1) undef
238 %neg.a = fsub float -0.0, %a
239 %cmp = fcmp ule float %neg.a, 4.0
240 %select = select i1 %cmp, i32 %x, i32 %y
241 store volatile i32 %select, ptr addrspace(1) undef
245 ; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
246 ; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
247 define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
248 %a = load volatile float, ptr addrspace(1) undef
249 %x = load volatile i32, ptr addrspace(1) undef
250 %y = load volatile i32, ptr addrspace(1) undef
251 %neg.a = fsub float -0.0, %a
252 %cmp = fcmp une float %neg.a, 4.0
253 %select = select i1 %cmp, i32 %x, i32 %y
254 store volatile i32 %select, ptr addrspace(1) undef
258 attributes #0 = { nounwind }