1 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
5 ; GCN: buffer_load_dword [[X:v[0-9]+]]
6 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
7 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
9 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
10 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
11 define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
12 %x = load volatile float, ptr addrspace(1) undef
13 %y = load volatile float, ptr addrspace(1) undef
14 %z = load volatile float, ptr addrspace(1) undef
15 %cmp = icmp eq i32 %c, 0
16 %fabs.x = call float @llvm.fabs.f32(float %x)
17 %fabs.y = call float @llvm.fabs.f32(float %y)
18 %select = select i1 %cmp, float %fabs.x, float %fabs.y
19 %add = fadd float %select, %z
20 store float %add, ptr addrspace(1) undef
24 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
25 ; GCN: buffer_load_dword [[X:v[0-9]+]]
26 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
27 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
28 ; GCN: buffer_load_dword [[W:v[0-9]+]]
30 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
31 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
32 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
33 define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
34 %x = load volatile float, ptr addrspace(1) undef
35 %y = load volatile float, ptr addrspace(1) undef
36 %z = load volatile float, ptr addrspace(1) undef
37 %w = load volatile float, ptr addrspace(1) undef
38 %cmp = icmp eq i32 %c, 0
39 %fabs.x = call float @llvm.fabs.f32(float %x)
40 %fabs.y = call float @llvm.fabs.f32(float %y)
41 %select = select i1 %cmp, float %fabs.x, float %fabs.y
42 %add0 = fadd float %select, %z
43 %add1 = fadd float %fabs.x, %w
44 store volatile float %add0, ptr addrspace(1) undef
45 store volatile float %add1, ptr addrspace(1) undef
49 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
50 ; GCN: buffer_load_dword [[X:v[0-9]+]]
51 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
52 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
54 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
55 ; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
56 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
58 ; GCN: buffer_store_dword [[ADD]]
59 ; GCN: buffer_store_dword [[X_ABS]]
60 define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
61 %x = load volatile float, ptr addrspace(1) undef
62 %y = load volatile float, ptr addrspace(1) undef
63 %z = load volatile float, ptr addrspace(1) undef
64 %cmp = icmp eq i32 %c, 0
65 %fabs.x = call float @llvm.fabs.f32(float %x)
66 %fabs.y = call float @llvm.fabs.f32(float %y)
67 %select = select i1 %cmp, float %fabs.x, float %fabs.y
68 %add0 = fadd float %select, %z
69 store volatile float %add0, ptr addrspace(1) undef
70 store volatile float %fabs.x, ptr addrspace(1) undef
74 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
75 ; GCN: buffer_load_dword [[X:v[0-9]+]]
76 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
77 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
78 ; GCN: buffer_load_dword [[W:v[0-9]+]]
80 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
81 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
82 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
83 define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
84 %x = load volatile float, ptr addrspace(1) undef
85 %y = load volatile float, ptr addrspace(1) undef
86 %z = load volatile float, ptr addrspace(1) undef
87 %w = load volatile float, ptr addrspace(1) undef
88 %cmp = icmp eq i32 %c, 0
89 %fabs.x = call float @llvm.fabs.f32(float %x)
90 %fabs.y = call float @llvm.fabs.f32(float %y)
91 %select = select i1 %cmp, float %fabs.x, float %fabs.y
92 %add0 = fadd float %select, %z
93 %add1 = fadd float %fabs.y, %w
94 store volatile float %add0, ptr addrspace(1) undef
95 store volatile float %add1, ptr addrspace(1) undef
99 ; GCN-LABEL: {{^}}add_select_fabs_var_f32:
100 ; GCN: buffer_load_dword [[X:v[0-9]+]]
101 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
102 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
104 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
105 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
106 define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
107 %x = load volatile float, ptr addrspace(1) undef
108 %y = load volatile float, ptr addrspace(1) undef
109 %z = load volatile float, ptr addrspace(1) undef
110 %cmp = icmp eq i32 %c, 0
111 %fabs.x = call float @llvm.fabs.f32(float %x)
112 %select = select i1 %cmp, float %fabs.x, float %y
113 %add = fadd float %select, %z
114 store volatile float %add, ptr addrspace(1) undef
118 ; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
119 ; GCN: buffer_load_dword [[X:v[0-9]+]]
120 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
122 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
123 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
124 define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
125 %x = load volatile float, ptr addrspace(1) undef
126 %y = load volatile float, ptr addrspace(1) undef
127 %cmp = icmp eq i32 %c, 0
128 %fabs = call float @llvm.fabs.f32(float %x)
129 %select = select i1 %cmp, float %fabs, float -1.0
130 %add = fadd float %select, %y
131 store volatile float %add, ptr addrspace(1) undef
135 ; FIXME: fabs should fold away
136 ; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
137 ; GCN: buffer_load_dword [[X:v[0-9]+]]
139 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
140 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
141 define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
142 %x = load volatile float, ptr addrspace(1) undef
143 %cmp = icmp eq i32 %c, 0
144 %select = select i1 %cmp, float -2.0, float -1.0
145 %fabs = call float @llvm.fabs.f32(float %select)
146 %add = fadd float %fabs, %x
147 store volatile float %add, ptr addrspace(1) undef
151 ; GCN-LABEL: {{^}}add_select_posk_posk_f32:
152 ; GCN: buffer_load_dword [[X:v[0-9]+]]
154 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
155 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
156 define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
157 %x = load volatile float, ptr addrspace(1) undef
158 %cmp = icmp eq i32 %c, 0
159 %select = select i1 %cmp, float 2.0, float 1.0
160 %add = fadd float %select, %x
161 store volatile float %add, ptr addrspace(1) undef
165 ; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
166 ; GCN: buffer_load_dword [[X:v[0-9]+]]
167 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
169 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
170 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
171 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
172 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
173 define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
174 %x = load volatile float, ptr addrspace(1) undef
175 %y = load volatile float, ptr addrspace(1) undef
176 %cmp = icmp eq i32 %c, 0
177 %fabs = call float @llvm.fabs.f32(float %x)
178 %select = select i1 %cmp, float -1.0, float %fabs
179 %add = fadd float %select, %y
180 store volatile float %add, ptr addrspace(1) undef
184 ; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
185 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
186 ; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
187 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
189 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
190 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
191 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
192 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
193 define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
194 %x = load volatile float, ptr addrspace(1) undef
195 %y = load volatile float, ptr addrspace(1) undef
196 %cmp = icmp eq i32 %c, 0
197 %fabs = call float @llvm.fabs.f32(float %x)
198 %select = select i1 %cmp, float -1024.0, float %fabs
199 %add = fadd float %select, %y
200 store volatile float %add, ptr addrspace(1) undef
204 ; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
205 ; GCN: buffer_load_dword [[X:v[0-9]+]]
206 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
208 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
209 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
210 define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
211 %x = load volatile float, ptr addrspace(1) undef
212 %y = load volatile float, ptr addrspace(1) undef
214 %cmp = icmp eq i32 %c, 0
215 %fabs = call float @llvm.fabs.f32(float %x)
216 %select = select i1 %cmp, float %fabs, float 1.0
217 %add = fadd float %select, %y
218 store volatile float %add, ptr addrspace(1) undef
222 ; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
223 ; GCN: buffer_load_dword [[X:v[0-9]+]]
224 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
226 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
227 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
228 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
229 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
230 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
231 %x = load volatile float, ptr addrspace(1) undef
232 %y = load volatile float, ptr addrspace(1) undef
233 %cmp = icmp eq i32 %c, 0
234 %fabs = call float @llvm.fabs.f32(float %x)
235 %select = select i1 %cmp, float 1.0, float %fabs
236 %add = fadd float %select, %y
237 store volatile float %add, ptr addrspace(1) undef
241 ; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
242 ; GCN: buffer_load_dword [[X:v[0-9]+]]
243 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
244 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
246 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
247 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
248 define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
249 %x = load volatile float, ptr addrspace(1) undef
250 %y = load volatile float, ptr addrspace(1) undef
251 %z = load volatile float, ptr addrspace(1) undef
252 %cmp = icmp eq i32 %c, 0
253 %fneg.x = fsub float -0.0, %x
254 %fneg.y = fsub float -0.0, %y
255 %select = select i1 %cmp, float %fneg.x, float %fneg.y
256 %add = fadd float %select, %z
257 store volatile float %add, ptr addrspace(1) undef
261 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
262 ; GCN: buffer_load_dword [[X:v[0-9]+]]
263 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
264 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
265 ; GCN: buffer_load_dword [[W:v[0-9]+]]
267 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
268 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
269 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
270 define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
271 %x = load volatile float, ptr addrspace(1) undef
272 %y = load volatile float, ptr addrspace(1) undef
273 %z = load volatile float, ptr addrspace(1) undef
274 %w = load volatile float, ptr addrspace(1) undef
275 %cmp = icmp eq i32 %c, 0
276 %fneg.x = fsub float -0.0, %x
277 %fneg.y = fsub float -0.0, %y
278 %select = select i1 %cmp, float %fneg.x, float %fneg.y
279 %add0 = fadd float %select, %z
280 %add1 = fadd float %fneg.x, %w
281 store volatile float %add0, ptr addrspace(1) undef
282 store volatile float %add1, ptr addrspace(1) undef
286 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
287 ; GCN: buffer_load_dword [[X:v[0-9]+]]
288 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
289 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
291 ; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
292 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
293 ; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
295 ; GCN: buffer_store_dword [[ADD]]
296 ; GCN: buffer_store_dword [[NEG_X]]
297 define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
298 %x = load volatile float, ptr addrspace(1) undef
299 %y = load volatile float, ptr addrspace(1) undef
300 %z = load volatile float, ptr addrspace(1) undef
301 %cmp = icmp eq i32 %c, 0
302 %fneg.x = fsub float -0.0, %x
303 %fneg.y = fsub float -0.0, %y
304 %select = select i1 %cmp, float %fneg.x, float %fneg.y
305 %add0 = fadd float %select, %z
306 store volatile float %add0, ptr addrspace(1) undef
307 store volatile float %fneg.x, ptr addrspace(1) undef
311 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
312 ; GCN: buffer_load_dword [[X:v[0-9]+]]
313 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
314 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
315 ; GCN: buffer_load_dword [[W:v[0-9]+]]
317 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
318 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
319 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
320 define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
321 %x = load volatile float, ptr addrspace(1) undef
322 %y = load volatile float, ptr addrspace(1) undef
323 %z = load volatile float, ptr addrspace(1) undef
324 %w = load volatile float, ptr addrspace(1) undef
325 %cmp = icmp eq i32 %c, 0
326 %fneg.x = fsub float -0.0, %x
327 %fneg.y = fsub float -0.0, %y
328 %select = select i1 %cmp, float %fneg.x, float %fneg.y
329 %add0 = fadd float %select, %z
330 %add1 = fadd float %fneg.y, %w
331 store volatile float %add0, ptr addrspace(1) undef
332 store volatile float %add1, ptr addrspace(1) undef
336 ; GCN-LABEL: {{^}}add_select_fneg_var_f32:
337 ; GCN: buffer_load_dword [[X:v[0-9]+]]
338 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
339 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
341 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
342 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
343 define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
344 %x = load volatile float, ptr addrspace(1) undef
345 %y = load volatile float, ptr addrspace(1) undef
346 %z = load volatile float, ptr addrspace(1) undef
347 %cmp = icmp eq i32 %c, 0
348 %fneg.x = fsub float -0.0, %x
349 %select = select i1 %cmp, float %fneg.x, float %y
350 %add = fadd float %select, %z
351 store volatile float %add, ptr addrspace(1) undef
355 ; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
356 ; GCN: buffer_load_dword [[X:v[0-9]+]]
357 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
359 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
360 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
361 define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
362 %x = load volatile float, ptr addrspace(1) undef
363 %y = load volatile float, ptr addrspace(1) undef
364 %cmp = icmp eq i32 %c, 0
365 %fneg.x = fsub float -0.0, %x
366 %select = select i1 %cmp, float %fneg.x, float -1.0
367 %add = fadd float %select, %y
368 store volatile float %add, ptr addrspace(1) undef
372 ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
373 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
374 ; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
376 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
377 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
378 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
379 define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
380 %x = load volatile float, ptr addrspace(1) undef
381 %y = load volatile float, ptr addrspace(1) undef
382 %cmp = icmp eq i32 %c, 0
383 %fneg.x = fneg float %x
384 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
385 %add = fadd float %select, %y
386 store volatile float %add, ptr addrspace(1) undef
390 ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
391 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
392 ; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
393 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
395 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
396 ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
398 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
399 define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
400 %x = load volatile float, ptr addrspace(1) undef
401 %y = load volatile float, ptr addrspace(1) undef
402 %cmp = icmp eq i32 %c, 0
403 %fneg.x = fneg float %x
404 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
405 %add = fadd float %select, %y
406 store volatile float %add, ptr addrspace(1) undef
410 ; GCN-LABEL: {{^}}add_select_negk_negk_f32:
411 ; GCN: buffer_load_dword [[X:v[0-9]+]]
414 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
415 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
416 define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
417 %x = load volatile float, ptr addrspace(1) undef
418 %cmp = icmp eq i32 %c, 0
419 %select = select i1 %cmp, float -2.0, float -1.0
420 %add = fadd float %select, %x
421 store volatile float %add, ptr addrspace(1) undef
425 ; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
426 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
427 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
428 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
431 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
432 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
433 define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
434 %x = load volatile float, ptr addrspace(1) undef
435 %cmp = icmp eq i32 %c, 0
436 %select = select i1 %cmp, float -2048.0, float -4096.0
437 %add = fadd float %select, %x
438 store volatile float %add, ptr addrspace(1) undef
442 ; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
443 ; GCN: buffer_load_dword [[X:v[0-9]+]]
445 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
446 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
447 define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
448 %x = load volatile float, ptr addrspace(1) undef
449 %cmp = icmp eq i32 %c, 0
450 %select = select i1 %cmp, float -2.0, float -1.0
451 %fneg.x = fsub float -0.0, %select
452 %add = fadd float %fneg.x, %x
453 store volatile float %add, ptr addrspace(1) undef
457 ; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
458 ; GCN: buffer_load_dword [[X:v[0-9]+]]
459 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
461 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
462 ; GCN: s_cselect_b64 vcc, -1, 0
463 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
464 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
465 define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
466 %x = load volatile float, ptr addrspace(1) undef
467 %y = load volatile float, ptr addrspace(1) undef
468 %cmp = icmp eq i32 %c, 0
469 %fneg.x = fsub float -0.0, %x
470 %select = select i1 %cmp, float -1.0, float %fneg.x
471 %add = fadd float %select, %y
472 store volatile float %add, ptr addrspace(1) undef
476 ; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
477 ; GCN: buffer_load_dword [[X:v[0-9]+]]
478 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
480 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
481 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
482 define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
483 %x = load volatile float, ptr addrspace(1) undef
484 %y = load volatile float, ptr addrspace(1) undef
485 %cmp = icmp eq i32 %c, 0
486 %fneg.x = fsub float -0.0, %x
487 %select = select i1 %cmp, float %fneg.x, float 1.0
488 %add = fadd float %select, %y
489 store volatile float %add, ptr addrspace(1) undef
493 ; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
494 ; GCN: buffer_load_dword [[X:v[0-9]+]]
495 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
497 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
498 ; GCN: s_cselect_b64 vcc, -1, 0
499 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
500 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
501 define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
502 %x = load volatile float, ptr addrspace(1) undef
503 %y = load volatile float, ptr addrspace(1) undef
504 %cmp = icmp eq i32 %c, 0
505 %fneg.x = fsub float -0.0, %x
506 %select = select i1 %cmp, float 1.0, float %fneg.x
507 %add = fadd float %select, %y
508 store volatile float %add, ptr addrspace(1) undef
512 ; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
513 ; GCN: buffer_load_dword [[X:v[0-9]+]]
514 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
515 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
517 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
518 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
519 define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
520 %x = load volatile float, ptr addrspace(1) undef
521 %y = load volatile float, ptr addrspace(1) undef
522 %z = load volatile float, ptr addrspace(1) undef
523 %cmp = icmp eq i32 %c, 0
524 %fabs.x = call float @llvm.fabs.f32(float %x)
525 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
526 %fabs.y = call float @llvm.fabs.f32(float %y)
527 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
528 %add = fadd float %select, %z
529 store volatile float %add, ptr addrspace(1) undef
533 ; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
534 ; GCN: buffer_load_dword [[X:v[0-9]+]]
535 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
536 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
538 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
539 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
540 define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
541 %x = load volatile float, ptr addrspace(1) undef
542 %y = load volatile float, ptr addrspace(1) undef
543 %z = load volatile float, ptr addrspace(1) undef
544 %cmp = icmp eq i32 %c, 0
545 %fabs.x = call float @llvm.fabs.f32(float %x)
546 %fabs.y = call float @llvm.fabs.f32(float %y)
547 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
548 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
549 %add = fadd float %select, %z
550 store volatile float %add, ptr addrspace(1) undef
554 ; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
555 ; GCN: buffer_load_dword [[X:v[0-9]+]]
556 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
557 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
559 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
560 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
561 define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
562 %x = load volatile float, ptr addrspace(1) undef
563 %y = load volatile float, ptr addrspace(1) undef
564 %z = load volatile float, ptr addrspace(1) undef
565 %cmp = icmp eq i32 %c, 0
566 %fneg.x = fsub float -0.000000e+00, %x
567 %fabs.y = call float @llvm.fabs.f32(float %y)
568 %select = select i1 %cmp, float %fneg.x, float %fabs.y
569 %add = fadd float %select, %z
570 store volatile float %add, ptr addrspace(1) undef
574 ; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
575 ; GCN: buffer_load_dword [[X:v[0-9]+]]
576 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
577 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
579 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
580 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
581 define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
582 %x = load volatile float, ptr addrspace(1) undef
583 %y = load volatile float, ptr addrspace(1) undef
584 %z = load volatile float, ptr addrspace(1) undef
585 %cmp = icmp eq i32 %c, 0
586 %fabs.x = call float @llvm.fabs.f32(float %x)
587 %fneg.y = fsub float -0.000000e+00, %y
588 %select = select i1 %cmp, float %fabs.x, float %fneg.y
589 %add = fadd float %select, %z
590 store volatile float %add, ptr addrspace(1) undef
594 ; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
595 ; GCN: buffer_load_dword [[X:v[0-9]+]]
596 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
597 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
599 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
600 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
601 define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
602 %x = load volatile float, ptr addrspace(1) undef
603 %y = load volatile float, ptr addrspace(1) undef
604 %z = load volatile float, ptr addrspace(1) undef
605 %cmp = icmp eq i32 %c, 0
606 %fneg.x = fsub float -0.000000e+00, %x
607 %fabs.y = call float @llvm.fabs.f32(float %y)
608 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
609 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
610 %add = fadd float %select, %z
611 store volatile float %add, ptr addrspace(1) undef
615 ; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
616 ; GCN: buffer_load_dword [[X:v[0-9]+]]
617 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
618 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
620 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
621 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
622 define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
623 %x = load volatile float, ptr addrspace(1) undef
624 %y = load volatile float, ptr addrspace(1) undef
625 %z = load volatile float, ptr addrspace(1) undef
626 %cmp = icmp eq i32 %c, 0
627 %fabs.x = call float @llvm.fabs.f32(float %x)
628 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
629 %fneg.y = fsub float -0.000000e+00, %y
630 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
631 %add = fadd float %select, %z
632 store volatile float %add, ptr addrspace(1) undef
636 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
637 ; GCN: buffer_load_dword [[X:v[0-9]+]]
638 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
640 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
641 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
642 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
643 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
644 define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
645 %x = load volatile float, ptr addrspace(1) undef
646 %y = load volatile float, ptr addrspace(1) undef
647 %cmp = icmp eq i32 %c, 0
648 %fabs.x = call float @llvm.fabs.f32(float %x)
649 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
650 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
651 %add = fmul float %select, %y
652 store volatile float %add, ptr addrspace(1) undef
656 ; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
657 ; GCN: buffer_load_dword [[X:v[0-9]+]]
658 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
660 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
661 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
662 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
663 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
664 define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
665 %x = load volatile float, ptr addrspace(1) undef
666 %y = load volatile float, ptr addrspace(1) undef
667 %cmp = icmp eq i32 %c, 0
668 %fabs.x = call float @llvm.fabs.f32(float %x)
669 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
670 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
671 %add = fmul float %select, %y
672 store volatile float %add, ptr addrspace(1) undef
676 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
677 ; GCN: buffer_load_dword [[X:v[0-9]+]]
678 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
680 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
681 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
682 define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
683 %x = load volatile float, ptr addrspace(1) undef
684 %y = load volatile float, ptr addrspace(1) undef
685 %cmp = icmp eq i32 %c, 0
686 %fabs.x = call float @llvm.fabs.f32(float %x)
687 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
688 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
689 %add = fmul float %select, %y
690 store volatile float %add, ptr addrspace(1) undef
694 ; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
695 ; GCN: buffer_load_dword [[X:v[0-9]+]]
696 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
699 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
700 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
701 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
702 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
703 %x = load volatile float, ptr addrspace(1) undef
704 %y = load volatile float, ptr addrspace(1) undef
705 %cmp = icmp eq i32 %c, 0
706 %fabs.x = call float @llvm.fabs.f32(float %x)
707 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
708 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
709 %add = fmul float %select, %y
710 store volatile float %add, ptr addrspace(1) undef
714 ; --------------------------------------------------------------------------------
715 ; Don't fold if fneg can fold into the source
716 ; --------------------------------------------------------------------------------
718 ; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
719 ; GCN: buffer_load_dword [[X:v[0-9]+]]
720 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
722 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
723 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
724 ; GCN-NEXT: buffer_store_dword [[SELECT]]
725 define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
726 %x = load volatile float, ptr addrspace(1) undef
727 %y = load volatile float, ptr addrspace(1) undef
728 %cmp = icmp eq i32 %c, 0
729 %add = fadd float %x, 4.0
730 %fneg = fsub float -0.0, %add
731 %select = select i1 %cmp, float %fneg, float 2.0
732 store volatile float %select, ptr addrspace(1) undef
736 ; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
737 ; GCN: buffer_load_dword [[X:v[0-9]+]]
739 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
740 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
741 ; GCN-NEXT: buffer_store_dword [[SELECT]]
742 define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
743 %x = load volatile float, ptr addrspace(1) undef
744 %cmp = icmp eq i32 %c, 0
745 %add = fsub float %x, 4.0
746 %fneg = fsub float -0.0, %add
747 %select = select i1 %cmp, float %fneg, float 2.0
748 store volatile float %select, ptr addrspace(1) undef
752 ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
753 ; GCN: buffer_load_dword [[X:v[0-9]+]]
755 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
756 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
757 ; GCN-NEXT: buffer_store_dword [[SELECT]]
758 define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
759 %x = load volatile float, ptr addrspace(1) undef
760 %cmp = icmp eq i32 %c, 0
761 %mul = fmul float %x, 4.0
762 %fneg = fsub float -0.0, %mul
763 %select = select i1 %cmp, float %fneg, float 2.0
764 store volatile float %select, ptr addrspace(1) undef
768 ; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
769 ; GCN: buffer_load_dword [[X:v[0-9]+]]
770 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
772 ; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
773 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
774 ; GCN-NEXT: buffer_store_dword [[SELECT]]
775 define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
776 %x = load volatile float, ptr addrspace(1) undef
777 %z = load volatile float, ptr addrspace(1) undef
778 %cmp = icmp eq i32 %c, 0
779 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
780 %fneg = fsub float -0.0, %fma
781 %select = select i1 %cmp, float %fneg, float 2.0
782 store volatile float %select, ptr addrspace(1) undef
786 ; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
787 ; GCN: buffer_load_dword [[X:v[0-9]+]]
788 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
790 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
791 ; GCN-NEXT: buffer_store_dword [[SELECT]]
792 define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
793 %x = load volatile float, ptr addrspace(1) undef
794 %z = load volatile float, ptr addrspace(1) undef
795 %cmp = icmp eq i32 %c, 0
796 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
797 %fneg = fsub float -0.0, %fmad
798 %select = select i1 %cmp, float %fneg, float 2.0
799 store volatile float %select, ptr addrspace(1) undef
803 ; FIXME: This one should fold to rcp
804 ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
805 ; GCN: buffer_load_dword [[X:v[0-9]+]]
807 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]]
808 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc
809 ; GCN-NEXT: buffer_store_dword [[SELECT]]
810 define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
811 %x = load volatile float, ptr addrspace(1) undef
812 %y = load volatile float, ptr addrspace(1) undef
813 %cmp = icmp eq i32 %c, 0
814 %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
815 %fneg = fsub float -0.0, %rcp
816 %select = select i1 %cmp, float %fneg, float 2.0
817 store volatile float %select, ptr addrspace(1) undef
821 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_inv2pi_f32:
822 ; GCN: buffer_load_dword [[X:v[0-9]+]]
823 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
825 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
826 ; GCN-DAG: s_cselect_b64 [[VCC:.*]], -1, 0
828 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
829 ; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]]
830 ; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
832 ; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
833 ; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
834 define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
835 %x = load volatile float, ptr addrspace(1) undef
836 %y = load volatile float, ptr addrspace(1) undef
837 %cmp = icmp eq i32 %c, 0
838 %fabs.x = call float @llvm.fabs.f32(float %x)
839 %fneg.fabs.x = fneg float %fabs.x
840 %select = select i1 %cmp, float %fneg.fabs.x, float 0x3FC45F3060000000
841 %add = fmul float %select, %y
842 store volatile float %add, ptr addrspace(1) undef
846 ; GCN-LABEL: {{^}}mul_select_posk_inv2pi_negfabs_f32:
847 ; GCN: buffer_load_dword [[X:v[0-9]+]]
848 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
850 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
852 ; GCN-DAG: s_cselect_b64 [[VCC:.*]], -1, 0
854 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
855 ; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]]
856 ; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
859 ; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
860 ; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
861 define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
862 %x = load volatile float, ptr addrspace(1) undef
863 %y = load volatile float, ptr addrspace(1) undef
864 %cmp = icmp eq i32 %c, 0
865 %fabs.x = call float @llvm.fabs.f32(float %x)
866 %fneg.fabs.x = fneg float %fabs.x
867 %select = select i1 %cmp, float 0x3FC45F3060000000, float %fneg.fabs.x
868 %add = fmul float %select, %y
869 store volatile float %add, ptr addrspace(1) undef
873 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_inv2pi_f32:
874 ; GCN: buffer_load_dword [[X:v[0-9]+]]
875 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
876 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
877 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s
878 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
879 define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
880 %x = load volatile float, ptr addrspace(1) undef
881 %y = load volatile float, ptr addrspace(1) undef
882 %cmp = icmp eq i32 %c, 0
883 %fabs.x = call float @llvm.fabs.f32(float %x)
884 %fneg.fabs.x = fneg float %fabs.x
885 %select = select i1 %cmp, float %fneg.fabs.x, float 0xBFC45F3060000000
886 %add = fmul float %select, %y
887 store volatile float %add, ptr addrspace(1) undef
891 ; GCN-LABEL: {{^}}mul_select_negk_inv2pi_negfabs_f32:
892 ; GCN: buffer_load_dword [[X:v[0-9]+]]
893 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
895 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
897 ; GCN: s_cselect_b64 s[0:1], -1, 0
898 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s[0:1]
899 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
900 define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
901 %x = load volatile float, ptr addrspace(1) undef
902 %y = load volatile float, ptr addrspace(1) undef
903 %cmp = icmp eq i32 %c, 0
904 %fabs.x = call float @llvm.fabs.f32(float %x)
905 %fneg.fabs.x = fneg float %fabs.x
906 %select = select i1 %cmp, float 0xBFC45F3060000000, float %fneg.fabs.x
907 %add = fmul float %select, %y
908 store volatile float %add, ptr addrspace(1) undef
912 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_0_f32:
913 ; GCN: buffer_load_dword [[X:v[0-9]+]]
914 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
915 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
916 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
917 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
918 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
919 define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
920 %x = load volatile float, ptr addrspace(1) undef
921 %y = load volatile float, ptr addrspace(1) undef
922 %cmp = icmp eq i32 %c, 0
923 %fabs.x = call float @llvm.fabs.f32(float %x)
924 %fneg.fabs.x = fneg float %fabs.x
925 %select = select i1 %cmp, float %fneg.fabs.x, float 0.0
926 %add = fmul float %select, %y
927 store volatile float %add, ptr addrspace(1) undef
932 ; GCN-LABEL: {{^}}mul_select_posk_0_negfabs_f32:
933 ; GCN: buffer_load_dword [[X:v[0-9]+]]
934 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
936 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
937 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
938 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
939 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
940 define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
941 %x = load volatile float, ptr addrspace(1) undef
942 %y = load volatile float, ptr addrspace(1) undef
943 %cmp = icmp eq i32 %c, 0
944 %fabs.x = call float @llvm.fabs.f32(float %x)
945 %fneg.fabs.x = fneg float %fabs.x
946 %select = select i1 %cmp, float 0.0, float %fneg.fabs.x
947 %add = fmul float %select, %y
948 store volatile float %add, ptr addrspace(1) undef
952 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_0_f32:
953 ; GCN: buffer_load_dword [[X:v[0-9]+]]
954 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
956 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
957 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
958 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
959 define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
960 %x = load volatile float, ptr addrspace(1) undef
961 %y = load volatile float, ptr addrspace(1) undef
962 %cmp = icmp eq i32 %c, 0
963 %fabs.x = call float @llvm.fabs.f32(float %x)
964 %fneg.fabs.x = fneg float %fabs.x
965 %select = select i1 %cmp, float %fneg.fabs.x, float -0.0
966 %add = fmul float %select, %y
967 store volatile float %add, ptr addrspace(1) undef
971 ; GCN-LABEL: {{^}}mul_select_negk_0_negfabs_f32:
972 ; GCN: buffer_load_dword [[X:v[0-9]+]]
973 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
975 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
977 ; GCN: s_cselect_b64 s[0:1], -1, 0
978 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
979 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
980 define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
981 %x = load volatile float, ptr addrspace(1) undef
982 %y = load volatile float, ptr addrspace(1) undef
983 %cmp = icmp eq i32 %c, 0
984 %fabs.x = call float @llvm.fabs.f32(float %x)
985 %fneg.fabs.x = fneg float %fabs.x
986 %select = select i1 %cmp, float -0.0, float %fneg.fabs.x
987 %add = fmul float %select, %y
988 store volatile float %add, ptr addrspace(1) undef
993 declare float @llvm.fabs.f32(float) #1
994 declare float @llvm.fma.f32(float, float, float) #1
995 declare float @llvm.fmuladd.f32(float, float, float) #1
996 declare float @llvm.amdgcn.rcp.f32(float) #1
997 declare float @llvm.amdgcn.rcp.legacy(float) #1
998 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
1000 attributes #0 = { nounwind }
1001 attributes #1 = { nounwind readnone }