1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
5 ; GCN: buffer_load_dword [[X:v[0-9]+]]
6 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
7 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
9 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
10 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
11 define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
12 %x = load volatile float, float addrspace(1)* undef
13 %y = load volatile float, float addrspace(1)* undef
14 %z = load volatile float, float addrspace(1)* undef
15 %cmp = icmp eq i32 %c, 0
16 %fabs.x = call float @llvm.fabs.f32(float %x)
17 %fabs.y = call float @llvm.fabs.f32(float %y)
18 %select = select i1 %cmp, float %fabs.x, float %fabs.y
19 %add = fadd float %select, %z
20 store float %add, float addrspace(1)* undef
24 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
25 ; GCN: buffer_load_dword [[X:v[0-9]+]]
26 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
27 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
28 ; GCN: buffer_load_dword [[W:v[0-9]+]]
30 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
31 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
32 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
33 define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
34 %x = load volatile float, float addrspace(1)* undef
35 %y = load volatile float, float addrspace(1)* undef
36 %z = load volatile float, float addrspace(1)* undef
37 %w = load volatile float, float addrspace(1)* undef
38 %cmp = icmp eq i32 %c, 0
39 %fabs.x = call float @llvm.fabs.f32(float %x)
40 %fabs.y = call float @llvm.fabs.f32(float %y)
41 %select = select i1 %cmp, float %fabs.x, float %fabs.y
42 %add0 = fadd float %select, %z
43 %add1 = fadd float %fabs.x, %w
44 store volatile float %add0, float addrspace(1)* undef
45 store volatile float %add1, float addrspace(1)* undef
49 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
50 ; GCN: buffer_load_dword [[X:v[0-9]+]]
51 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
52 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
54 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
55 ; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
56 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
58 ; GCN: buffer_store_dword [[ADD]]
59 ; GCN: buffer_store_dword [[X_ABS]]
60 define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
61 %x = load volatile float, float addrspace(1)* undef
62 %y = load volatile float, float addrspace(1)* undef
63 %z = load volatile float, float addrspace(1)* undef
64 %cmp = icmp eq i32 %c, 0
65 %fabs.x = call float @llvm.fabs.f32(float %x)
66 %fabs.y = call float @llvm.fabs.f32(float %y)
67 %select = select i1 %cmp, float %fabs.x, float %fabs.y
68 %add0 = fadd float %select, %z
69 store volatile float %add0, float addrspace(1)* undef
70 store volatile float %fabs.x, float addrspace(1)* undef
74 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
75 ; GCN: buffer_load_dword [[X:v[0-9]+]]
76 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
77 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
78 ; GCN: buffer_load_dword [[W:v[0-9]+]]
80 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
81 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
82 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
83 define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
84 %x = load volatile float, float addrspace(1)* undef
85 %y = load volatile float, float addrspace(1)* undef
86 %z = load volatile float, float addrspace(1)* undef
87 %w = load volatile float, float addrspace(1)* undef
88 %cmp = icmp eq i32 %c, 0
89 %fabs.x = call float @llvm.fabs.f32(float %x)
90 %fabs.y = call float @llvm.fabs.f32(float %y)
91 %select = select i1 %cmp, float %fabs.x, float %fabs.y
92 %add0 = fadd float %select, %z
93 %add1 = fadd float %fabs.y, %w
94 store volatile float %add0, float addrspace(1)* undef
95 store volatile float %add1, float addrspace(1)* undef
99 ; GCN-LABEL: {{^}}add_select_fabs_var_f32:
100 ; GCN: buffer_load_dword [[X:v[0-9]+]]
101 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
102 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
104 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
105 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
106 define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
107 %x = load volatile float, float addrspace(1)* undef
108 %y = load volatile float, float addrspace(1)* undef
109 %z = load volatile float, float addrspace(1)* undef
110 %cmp = icmp eq i32 %c, 0
111 %fabs.x = call float @llvm.fabs.f32(float %x)
112 %select = select i1 %cmp, float %fabs.x, float %y
113 %add = fadd float %select, %z
114 store volatile float %add, float addrspace(1)* undef
118 ; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
119 ; GCN: buffer_load_dword [[X:v[0-9]+]]
120 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
122 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
123 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
124 define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
125 %x = load volatile float, float addrspace(1)* undef
126 %y = load volatile float, float addrspace(1)* undef
127 %cmp = icmp eq i32 %c, 0
128 %fabs = call float @llvm.fabs.f32(float %x)
129 %select = select i1 %cmp, float %fabs, float -1.0
130 %add = fadd float %select, %y
131 store volatile float %add, float addrspace(1)* undef
135 ; FIXME: fabs should fold away
136 ; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
137 ; GCN: buffer_load_dword [[X:v[0-9]+]]
139 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
140 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
141 define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
142 %x = load volatile float, float addrspace(1)* undef
143 %cmp = icmp eq i32 %c, 0
144 %select = select i1 %cmp, float -2.0, float -1.0
145 %fabs = call float @llvm.fabs.f32(float %select)
146 %add = fadd float %fabs, %x
147 store volatile float %add, float addrspace(1)* undef
151 ; GCN-LABEL: {{^}}add_select_posk_posk_f32:
152 ; GCN: buffer_load_dword [[X:v[0-9]+]]
154 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
155 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
156 define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
157 %x = load volatile float, float addrspace(1)* undef
158 %cmp = icmp eq i32 %c, 0
159 %select = select i1 %cmp, float 2.0, float 1.0
160 %add = fadd float %select, %x
161 store volatile float %add, float addrspace(1)* undef
165 ; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
166 ; GCN: buffer_load_dword [[X:v[0-9]+]]
167 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
169 ; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
170 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
171 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
172 define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
173 %x = load volatile float, float addrspace(1)* undef
174 %y = load volatile float, float addrspace(1)* undef
175 %cmp = icmp eq i32 %c, 0
176 %fabs = call float @llvm.fabs.f32(float %x)
177 %select = select i1 %cmp, float -1.0, float %fabs
178 %add = fadd float %select, %y
179 store volatile float %add, float addrspace(1)* undef
183 ; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
184 ; GCN: buffer_load_dword [[X:v[0-9]+]]
185 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
186 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
188 ; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
189 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
190 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
191 define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
192 %x = load volatile float, float addrspace(1)* undef
193 %y = load volatile float, float addrspace(1)* undef
194 %cmp = icmp eq i32 %c, 0
195 %fabs = call float @llvm.fabs.f32(float %x)
196 %select = select i1 %cmp, float -1024.0, float %fabs
197 %add = fadd float %select, %y
198 store volatile float %add, float addrspace(1)* undef
202 ; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
203 ; GCN: buffer_load_dword [[X:v[0-9]+]]
204 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
206 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
207 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
208 define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
209 %x = load volatile float, float addrspace(1)* undef
210 %y = load volatile float, float addrspace(1)* undef
212 %cmp = icmp eq i32 %c, 0
213 %fabs = call float @llvm.fabs.f32(float %x)
214 %select = select i1 %cmp, float %fabs, float 1.0
215 %add = fadd float %select, %y
216 store volatile float %add, float addrspace(1)* undef
220 ; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
221 ; GCN: buffer_load_dword [[X:v[0-9]+]]
222 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
224 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
225 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
226 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
227 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
228 %x = load volatile float, float addrspace(1)* undef
229 %y = load volatile float, float addrspace(1)* undef
230 %cmp = icmp eq i32 %c, 0
231 %fabs = call float @llvm.fabs.f32(float %x)
232 %select = select i1 %cmp, float 1.0, float %fabs
233 %add = fadd float %select, %y
234 store volatile float %add, float addrspace(1)* undef
238 ; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
239 ; GCN: buffer_load_dword [[X:v[0-9]+]]
240 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
241 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
243 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
244 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
245 define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
246 %x = load volatile float, float addrspace(1)* undef
247 %y = load volatile float, float addrspace(1)* undef
248 %z = load volatile float, float addrspace(1)* undef
249 %cmp = icmp eq i32 %c, 0
250 %fneg.x = fsub float -0.0, %x
251 %fneg.y = fsub float -0.0, %y
252 %select = select i1 %cmp, float %fneg.x, float %fneg.y
253 %add = fadd float %select, %z
254 store volatile float %add, float addrspace(1)* undef
258 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
259 ; GCN: buffer_load_dword [[X:v[0-9]+]]
260 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
261 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
262 ; GCN: buffer_load_dword [[W:v[0-9]+]]
264 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
265 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
266 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
267 define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
268 %x = load volatile float, float addrspace(1)* undef
269 %y = load volatile float, float addrspace(1)* undef
270 %z = load volatile float, float addrspace(1)* undef
271 %w = load volatile float, float addrspace(1)* undef
272 %cmp = icmp eq i32 %c, 0
273 %fneg.x = fsub float -0.0, %x
274 %fneg.y = fsub float -0.0, %y
275 %select = select i1 %cmp, float %fneg.x, float %fneg.y
276 %add0 = fadd float %select, %z
277 %add1 = fadd float %fneg.x, %w
278 store volatile float %add0, float addrspace(1)* undef
279 store volatile float %add1, float addrspace(1)* undef
283 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
284 ; GCN: buffer_load_dword [[X:v[0-9]+]]
285 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
286 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
288 ; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
289 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
290 ; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
292 ; GCN: buffer_store_dword [[ADD]]
293 ; GCN: buffer_store_dword [[NEG_X]]
294 define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
295 %x = load volatile float, float addrspace(1)* undef
296 %y = load volatile float, float addrspace(1)* undef
297 %z = load volatile float, float addrspace(1)* undef
298 %cmp = icmp eq i32 %c, 0
299 %fneg.x = fsub float -0.0, %x
300 %fneg.y = fsub float -0.0, %y
301 %select = select i1 %cmp, float %fneg.x, float %fneg.y
302 %add0 = fadd float %select, %z
303 store volatile float %add0, float addrspace(1)* undef
304 store volatile float %fneg.x, float addrspace(1)* undef
308 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
309 ; GCN: buffer_load_dword [[X:v[0-9]+]]
310 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
311 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
312 ; GCN: buffer_load_dword [[W:v[0-9]+]]
314 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
315 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
316 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
317 define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
318 %x = load volatile float, float addrspace(1)* undef
319 %y = load volatile float, float addrspace(1)* undef
320 %z = load volatile float, float addrspace(1)* undef
321 %w = load volatile float, float addrspace(1)* undef
322 %cmp = icmp eq i32 %c, 0
323 %fneg.x = fsub float -0.0, %x
324 %fneg.y = fsub float -0.0, %y
325 %select = select i1 %cmp, float %fneg.x, float %fneg.y
326 %add0 = fadd float %select, %z
327 %add1 = fadd float %fneg.y, %w
328 store volatile float %add0, float addrspace(1)* undef
329 store volatile float %add1, float addrspace(1)* undef
333 ; GCN-LABEL: {{^}}add_select_fneg_var_f32:
334 ; GCN: buffer_load_dword [[X:v[0-9]+]]
335 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
336 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
338 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
339 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
340 define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
341 %x = load volatile float, float addrspace(1)* undef
342 %y = load volatile float, float addrspace(1)* undef
343 %z = load volatile float, float addrspace(1)* undef
344 %cmp = icmp eq i32 %c, 0
345 %fneg.x = fsub float -0.0, %x
346 %select = select i1 %cmp, float %fneg.x, float %y
347 %add = fadd float %select, %z
348 store volatile float %add, float addrspace(1)* undef
352 ; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
353 ; GCN: buffer_load_dword [[X:v[0-9]+]]
354 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
356 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
357 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
358 define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
359 %x = load volatile float, float addrspace(1)* undef
360 %y = load volatile float, float addrspace(1)* undef
361 %cmp = icmp eq i32 %c, 0
362 %fneg.x = fsub float -0.0, %x
363 %select = select i1 %cmp, float %fneg.x, float -1.0
364 %add = fadd float %select, %y
365 store volatile float %add, float addrspace(1)* undef
369 ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
370 ; GCN: buffer_load_dword [[X:v[0-9]+]]
371 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
372 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
374 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
375 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
376 define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
377 %x = load volatile float, float addrspace(1)* undef
378 %y = load volatile float, float addrspace(1)* undef
379 %cmp = icmp eq i32 %c, 0
380 %fneg.x = fsub float -0.0, %x
381 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
382 %add = fadd float %select, %y
383 store volatile float %add, float addrspace(1)* undef
387 ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
388 ; GCN: buffer_load_dword [[X:v[0-9]+]]
389 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
390 ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
392 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
393 ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
395 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
396 define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
397 %x = load volatile float, float addrspace(1)* undef
398 %y = load volatile float, float addrspace(1)* undef
399 %cmp = icmp eq i32 %c, 0
400 %fneg.x = fsub float -0.0, %x
401 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
402 %add = fadd float %select, %y
403 store volatile float %add, float addrspace(1)* undef
407 ; GCN-LABEL: {{^}}add_select_negk_negk_f32:
408 ; GCN: buffer_load_dword [[X:v[0-9]+]]
410 ; GCN: v_cmp_eq_u32_e64
411 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
412 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
413 define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
414 %x = load volatile float, float addrspace(1)* undef
415 %cmp = icmp eq i32 %c, 0
416 %select = select i1 %cmp, float -2.0, float -1.0
417 %add = fadd float %select, %x
418 store volatile float %add, float addrspace(1)* undef
422 ; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
423 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
424 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
425 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
427 ; GCN: v_cmp_eq_u32_e64
428 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
429 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
430 define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
431 %x = load volatile float, float addrspace(1)* undef
432 %cmp = icmp eq i32 %c, 0
433 %select = select i1 %cmp, float -2048.0, float -4096.0
434 %add = fadd float %select, %x
435 store volatile float %add, float addrspace(1)* undef
439 ; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
440 ; GCN: buffer_load_dword [[X:v[0-9]+]]
442 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
443 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
444 define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
445 %x = load volatile float, float addrspace(1)* undef
446 %cmp = icmp eq i32 %c, 0
447 %select = select i1 %cmp, float -2.0, float -1.0
448 %fneg.x = fsub float -0.0, %select
449 %add = fadd float %fneg.x, %x
450 store volatile float %add, float addrspace(1)* undef
454 ; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
455 ; GCN: buffer_load_dword [[X:v[0-9]+]]
456 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
458 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
459 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
460 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
461 define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
462 %x = load volatile float, float addrspace(1)* undef
463 %y = load volatile float, float addrspace(1)* undef
464 %cmp = icmp eq i32 %c, 0
465 %fneg.x = fsub float -0.0, %x
466 %select = select i1 %cmp, float -1.0, float %fneg.x
467 %add = fadd float %select, %y
468 store volatile float %add, float addrspace(1)* undef
472 ; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
473 ; GCN: buffer_load_dword [[X:v[0-9]+]]
474 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
476 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
477 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
478 define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
479 %x = load volatile float, float addrspace(1)* undef
480 %y = load volatile float, float addrspace(1)* undef
481 %cmp = icmp eq i32 %c, 0
482 %fneg.x = fsub float -0.0, %x
483 %select = select i1 %cmp, float %fneg.x, float 1.0
484 %add = fadd float %select, %y
485 store volatile float %add, float addrspace(1)* undef
489 ; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
490 ; GCN: buffer_load_dword [[X:v[0-9]+]]
491 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
493 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
494 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
495 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
496 define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
497 %x = load volatile float, float addrspace(1)* undef
498 %y = load volatile float, float addrspace(1)* undef
499 %cmp = icmp eq i32 %c, 0
500 %fneg.x = fsub float -0.0, %x
501 %select = select i1 %cmp, float 1.0, float %fneg.x
502 %add = fadd float %select, %y
503 store volatile float %add, float addrspace(1)* undef
507 ; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
508 ; GCN: buffer_load_dword [[X:v[0-9]+]]
509 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
510 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
512 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
513 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
514 define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
515 %x = load volatile float, float addrspace(1)* undef
516 %y = load volatile float, float addrspace(1)* undef
517 %z = load volatile float, float addrspace(1)* undef
518 %cmp = icmp eq i32 %c, 0
519 %fabs.x = call float @llvm.fabs.f32(float %x)
520 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
521 %fabs.y = call float @llvm.fabs.f32(float %y)
522 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
523 %add = fadd float %select, %z
524 store volatile float %add, float addrspace(1)* undef
528 ; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
529 ; GCN: buffer_load_dword [[X:v[0-9]+]]
530 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
531 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
533 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
534 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
535 define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
536 %x = load volatile float, float addrspace(1)* undef
537 %y = load volatile float, float addrspace(1)* undef
538 %z = load volatile float, float addrspace(1)* undef
539 %cmp = icmp eq i32 %c, 0
540 %fabs.x = call float @llvm.fabs.f32(float %x)
541 %fabs.y = call float @llvm.fabs.f32(float %y)
542 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
543 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
544 %add = fadd float %select, %z
545 store volatile float %add, float addrspace(1)* undef
549 ; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
550 ; GCN: buffer_load_dword [[X:v[0-9]+]]
551 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
552 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
554 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
555 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
556 define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
557 %x = load volatile float, float addrspace(1)* undef
558 %y = load volatile float, float addrspace(1)* undef
559 %z = load volatile float, float addrspace(1)* undef
560 %cmp = icmp eq i32 %c, 0
561 %fneg.x = fsub float -0.000000e+00, %x
562 %fabs.y = call float @llvm.fabs.f32(float %y)
563 %select = select i1 %cmp, float %fneg.x, float %fabs.y
564 %add = fadd float %select, %z
565 store volatile float %add, float addrspace(1)* undef
569 ; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
570 ; GCN: buffer_load_dword [[X:v[0-9]+]]
571 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
572 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
574 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
575 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
576 define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
577 %x = load volatile float, float addrspace(1)* undef
578 %y = load volatile float, float addrspace(1)* undef
579 %z = load volatile float, float addrspace(1)* undef
580 %cmp = icmp eq i32 %c, 0
581 %fabs.x = call float @llvm.fabs.f32(float %x)
582 %fneg.y = fsub float -0.000000e+00, %y
583 %select = select i1 %cmp, float %fabs.x, float %fneg.y
584 %add = fadd float %select, %z
585 store volatile float %add, float addrspace(1)* undef
589 ; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
590 ; GCN: buffer_load_dword [[X:v[0-9]+]]
591 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
592 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
594 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
595 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
596 define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
597 %x = load volatile float, float addrspace(1)* undef
598 %y = load volatile float, float addrspace(1)* undef
599 %z = load volatile float, float addrspace(1)* undef
600 %cmp = icmp eq i32 %c, 0
601 %fneg.x = fsub float -0.000000e+00, %x
602 %fabs.y = call float @llvm.fabs.f32(float %y)
603 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
604 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
605 %add = fadd float %select, %z
606 store volatile float %add, float addrspace(1)* undef
610 ; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
611 ; GCN: buffer_load_dword [[X:v[0-9]+]]
612 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
613 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
615 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
616 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
617 define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
618 %x = load volatile float, float addrspace(1)* undef
619 %y = load volatile float, float addrspace(1)* undef
620 %z = load volatile float, float addrspace(1)* undef
621 %cmp = icmp eq i32 %c, 0
622 %fabs.x = call float @llvm.fabs.f32(float %x)
623 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
624 %fneg.y = fsub float -0.000000e+00, %y
625 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
626 %add = fadd float %select, %z
627 store volatile float %add, float addrspace(1)* undef
631 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
632 ; GCN: buffer_load_dword [[X:v[0-9]+]]
633 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
635 ; GCN-DAG: v_cmp_eq_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
636 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
637 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
638 define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
639 %x = load volatile float, float addrspace(1)* undef
640 %y = load volatile float, float addrspace(1)* undef
641 %cmp = icmp eq i32 %c, 0
642 %fabs.x = call float @llvm.fabs.f32(float %x)
643 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
644 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
645 %add = fmul float %select, %y
646 store volatile float %add, float addrspace(1)* undef
650 ; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
651 ; GCN: buffer_load_dword [[X:v[0-9]+]]
652 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
654 ; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
655 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
656 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
657 define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
658 %x = load volatile float, float addrspace(1)* undef
659 %y = load volatile float, float addrspace(1)* undef
660 %cmp = icmp eq i32 %c, 0
661 %fabs.x = call float @llvm.fabs.f32(float %x)
662 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
663 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
664 %add = fmul float %select, %y
665 store volatile float %add, float addrspace(1)* undef
669 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
670 ; GCN: buffer_load_dword [[X:v[0-9]+]]
671 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
673 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
674 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
675 define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
676 %x = load volatile float, float addrspace(1)* undef
677 %y = load volatile float, float addrspace(1)* undef
678 %cmp = icmp eq i32 %c, 0
679 %fabs.x = call float @llvm.fabs.f32(float %x)
680 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
681 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
682 %add = fmul float %select, %y
683 store volatile float %add, float addrspace(1)* undef
687 ; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
688 ; GCN: buffer_load_dword [[X:v[0-9]+]]
689 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
691 ; GCN: v_cmp_ne_u32_e64 vcc
692 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
693 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
694 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
695 %x = load volatile float, float addrspace(1)* undef
696 %y = load volatile float, float addrspace(1)* undef
697 %cmp = icmp eq i32 %c, 0
698 %fabs.x = call float @llvm.fabs.f32(float %x)
699 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
700 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
701 %add = fmul float %select, %y
702 store volatile float %add, float addrspace(1)* undef
706 ; --------------------------------------------------------------------------------
707 ; Don't fold if fneg can fold into the source
708 ; --------------------------------------------------------------------------------
710 ; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
711 ; GCN: buffer_load_dword [[X:v[0-9]+]]
712 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
714 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
715 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
716 ; GCN-NEXT: buffer_store_dword [[SELECT]]
717 define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
718 %x = load volatile float, float addrspace(1)* undef
719 %y = load volatile float, float addrspace(1)* undef
720 %cmp = icmp eq i32 %c, 0
721 %add = fadd float %x, 4.0
722 %fneg = fsub float -0.0, %add
723 %select = select i1 %cmp, float %fneg, float 2.0
724 store volatile float %select, float addrspace(1)* undef
728 ; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
729 ; GCN: buffer_load_dword [[X:v[0-9]+]]
731 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
732 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
733 ; GCN-NEXT: buffer_store_dword [[SELECT]]
734 define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
735 %x = load volatile float, float addrspace(1)* undef
736 %cmp = icmp eq i32 %c, 0
737 %add = fsub float %x, 4.0
738 %fneg = fsub float -0.0, %add
739 %select = select i1 %cmp, float %fneg, float 2.0
740 store volatile float %select, float addrspace(1)* undef
744 ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
745 ; GCN: buffer_load_dword [[X:v[0-9]+]]
747 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
748 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
749 ; GCN-NEXT: buffer_store_dword [[SELECT]]
750 define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
751 %x = load volatile float, float addrspace(1)* undef
752 %cmp = icmp eq i32 %c, 0
753 %mul = fmul float %x, 4.0
754 %fneg = fsub float -0.0, %mul
755 %select = select i1 %cmp, float %fneg, float 2.0
756 store volatile float %select, float addrspace(1)* undef
760 ; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
761 ; GCN: buffer_load_dword [[X:v[0-9]+]]
762 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
764 ; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
765 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
766 ; GCN-NEXT: buffer_store_dword [[SELECT]]
767 define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
768 %x = load volatile float, float addrspace(1)* undef
769 %z = load volatile float, float addrspace(1)* undef
770 %cmp = icmp eq i32 %c, 0
771 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
772 %fneg = fsub float -0.0, %fma
773 %select = select i1 %cmp, float %fneg, float 2.0
774 store volatile float %select, float addrspace(1)* undef
778 ; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
779 ; GCN: buffer_load_dword [[X:v[0-9]+]]
780 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
782 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
783 ; GCN-NEXT: buffer_store_dword [[SELECT]]
784 define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
785 %x = load volatile float, float addrspace(1)* undef
786 %z = load volatile float, float addrspace(1)* undef
787 %cmp = icmp eq i32 %c, 0
788 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
789 %fneg = fsub float -0.0, %fmad
790 %select = select i1 %cmp, float %fneg, float 2.0
791 store volatile float %select, float addrspace(1)* undef
795 ; FIXME: This one should fold to rcp
796 ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
797 ; GCN: buffer_load_dword [[X:v[0-9]+]]
799 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
800 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
801 ; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
802 ; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
803 define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
804 %x = load volatile float, float addrspace(1)* undef
805 %y = load volatile float, float addrspace(1)* undef
806 %cmp = icmp eq i32 %c, 0
807 %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
808 %fneg = fsub float -0.0, %rcp
809 %select = select i1 %cmp, float %fneg, float 2.0
810 store volatile float %select, float addrspace(1)* undef
814 declare float @llvm.fabs.f32(float) #1
815 declare float @llvm.fma.f32(float, float, float) #1
816 declare float @llvm.fmuladd.f32(float, float, float) #1
817 declare float @llvm.amdgcn.rcp.f32(float) #1
818 declare float @llvm.amdgcn.rcp.legacy(float) #1
819 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
821 attributes #0 = { nounwind }
822 attributes #1 = { nounwind readnone }