1 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -start-before=amdgpu-unify-divergent-exit-nodes -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI %s
5 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI %s
7 ; --------------------------------------------------------------------------------
9 ; --------------------------------------------------------------------------------
11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20 define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
21 %tid = call i32 @llvm.amdgcn.workitem.id.x()
22 %tid.ext = sext i32 %tid to i64
23 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
24 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
25 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
26 %a = load volatile float, ptr addrspace(1) %a.gep
27 %b = load volatile float, ptr addrspace(1) %b.gep
28 %add = fadd float %a, %b
29 %fneg = fneg float %add
30 store float %fneg, ptr addrspace(1) %out.gep
34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40 ; GCN-NEXT: s_waitcnt vmcnt(0)
41 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
42 ; GCN-NEXT: s_waitcnt vmcnt(0)
43 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
44 %tid = call i32 @llvm.amdgcn.workitem.id.x()
45 %tid.ext = sext i32 %tid to i64
46 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
47 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
48 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
49 %a = load volatile float, ptr addrspace(1) %a.gep
50 %b = load volatile float, ptr addrspace(1) %b.gep
51 %add = fadd float %a, %b
52 %fneg = fneg float %add
53 store volatile float %fneg, ptr addrspace(1) %out
54 store volatile float %add, ptr addrspace(1) %out
58 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
59 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
60 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
62 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
63 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
64 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
66 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
67 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
69 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
70 ; GCN-NEXT: s_waitcnt vmcnt(0)
71 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
72 ; GCN-NEXT: s_waitcnt vmcnt(0)
73 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
74 %tid = call i32 @llvm.amdgcn.workitem.id.x()
75 %tid.ext = sext i32 %tid to i64
76 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
77 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
78 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
79 %a = load volatile float, ptr addrspace(1) %a.gep
80 %b = load volatile float, ptr addrspace(1) %b.gep
81 %add = fadd float %a, %b
82 %fneg = fneg float %add
83 %use1 = fmul float %add, 4.0
84 store volatile float %fneg, ptr addrspace(1) %out
85 store volatile float %use1, ptr addrspace(1) %out
89 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
90 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
91 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
93 ; GCN-SAFE: v_sub_f32_e32
94 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
96 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
98 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
99 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
100 %tid = call i32 @llvm.amdgcn.workitem.id.x()
101 %tid.ext = sext i32 %tid to i64
102 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
103 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
104 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
105 %a = load volatile float, ptr addrspace(1) %a.gep
106 %b = load volatile float, ptr addrspace(1) %b.gep
107 %fneg.a = fneg float %a
108 %add = fadd float %fneg.a, %b
109 %fneg = fneg float %add
110 store volatile float %fneg, ptr addrspace(1) %out
114 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
115 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
116 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
118 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
119 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
121 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
122 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
123 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
124 %tid = call i32 @llvm.amdgcn.workitem.id.x()
125 %tid.ext = sext i32 %tid to i64
126 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
127 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
128 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
129 %a = load volatile float, ptr addrspace(1) %a.gep
130 %b = load volatile float, ptr addrspace(1) %b.gep
131 %fneg.b = fneg float %b
132 %add = fadd float %a, %fneg.b
133 %fneg = fneg float %add
134 store volatile float %fneg, ptr addrspace(1) %out
138 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
139 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
140 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
142 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
143 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
145 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
146 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
147 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
148 %tid = call i32 @llvm.amdgcn.workitem.id.x()
149 %tid.ext = sext i32 %tid to i64
150 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
151 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
152 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
153 %a = load volatile float, ptr addrspace(1) %a.gep
154 %b = load volatile float, ptr addrspace(1) %b.gep
155 %fneg.a = fneg float %a
156 %fneg.b = fneg float %b
157 %add = fadd float %fneg.a, %fneg.b
158 %fneg = fneg float %add
159 store volatile float %fneg, ptr addrspace(1) %out
163 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
164 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
165 ; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
167 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
168 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
169 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
171 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
172 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
173 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
174 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
175 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
176 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
177 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
178 %tid = call i32 @llvm.amdgcn.workitem.id.x()
179 %tid.ext = sext i32 %tid to i64
180 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
181 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
182 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
183 %a = load volatile float, ptr addrspace(1) %a.gep
184 %b = load volatile float, ptr addrspace(1) %b.gep
185 %fneg.a = fneg float %a
186 %add = fadd float %fneg.a, %b
187 %fneg = fneg float %add
188 store volatile float %fneg, ptr addrspace(1) %out
189 store volatile float %fneg.a, ptr addrspace(1) %out
193 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
194 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
195 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
197 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
199 ; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
201 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
202 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
203 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
204 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
205 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
206 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
207 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
208 %tid = call i32 @llvm.amdgcn.workitem.id.x()
209 %tid.ext = sext i32 %tid to i64
210 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
211 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
212 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
213 %a = load volatile float, ptr addrspace(1) %a.gep
214 %b = load volatile float, ptr addrspace(1) %b.gep
215 %fneg.a = fneg float %a
216 %add = fadd float %fneg.a, %b
217 %fneg = fneg float %add
218 %use1 = fmul float %fneg.a, %c
219 store volatile float %fneg, ptr addrspace(1) %out
220 store volatile float %use1, ptr addrspace(1) %out
224 ; This one asserted with -enable-no-signed-zeros-fp-math
225 ; GCN-LABEL: {{^}}fneg_fadd_0:
226 ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
227 ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
228 ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
230 ; GCN-NSZ-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0, v
231 ; GCN-NSZ: v_cmp_ngt_f32
232 ; GCN-NSZ: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
233 define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
235 %tmp7 = fdiv float 1.000000e+00, %tmp6
236 %tmp8 = fmul float 0.000000e+00, %tmp7
237 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
238 %.i188 = fadd float %tmp9, 0.000000e+00
239 %tmp10 = fcmp uge float %.i188, %tmp2
240 %tmp11 = fneg float %.i188
241 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
242 %tmp12 = fcmp ule float %.i092, 0.000000e+00
243 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
247 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
248 ; function attribute unsafe-fp-math automatically. Combine with the previous test
250 ; GCN-LABEL: {{^}}fneg_fadd_0_nsz:
251 ; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
252 ; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
253 ; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fc00000
254 ; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], 0, [[A]]
255 ; GCN-NSZ-DAG: v_cmp_ngt_f32_e32 {{.*}}, s{{[0-9]+}}, [[D]]
256 ; GCN-NSZ-DAG: v_cndmask_b32_e64 [[E:v[0-9]+]], -[[D]], v{{[0-9]+}},
257 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e32 {{.*}}, 0
258 ; GCN-NSZ-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, [[C]], 0,
259 define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
261 %tmp7 = fdiv afn float 1.000000e+00, %tmp6
262 %tmp8 = fmul float 0.000000e+00, %tmp7
263 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
264 %.i188 = fadd float %tmp9, 0.000000e+00
265 %tmp10 = fcmp uge float %.i188, %tmp2
266 %tmp11 = fneg float %.i188
267 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
268 %tmp12 = fcmp ule float %.i092, 0.000000e+00
269 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
273 ; --------------------------------------------------------------------------------
275 ; --------------------------------------------------------------------------------
277 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
278 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
279 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
280 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
281 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
282 define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
283 %tid = call i32 @llvm.amdgcn.workitem.id.x()
284 %tid.ext = sext i32 %tid to i64
285 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
286 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
287 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
288 %a = load volatile float, ptr addrspace(1) %a.gep
289 %b = load volatile float, ptr addrspace(1) %b.gep
290 %mul = fmul float %a, %b
291 %fneg = fneg float %mul
292 store float %fneg, ptr addrspace(1) %out.gep
296 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
297 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
298 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
299 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
300 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
301 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
302 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
303 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
304 %tid = call i32 @llvm.amdgcn.workitem.id.x()
305 %tid.ext = sext i32 %tid to i64
306 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
307 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
308 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
309 %a = load volatile float, ptr addrspace(1) %a.gep
310 %b = load volatile float, ptr addrspace(1) %b.gep
311 %mul = fmul float %a, %b
312 %fneg = fneg float %mul
313 store volatile float %fneg, ptr addrspace(1) %out
314 store volatile float %mul, ptr addrspace(1) %out
318 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
319 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
320 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
321 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
322 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
324 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
325 ; GCN-NEXT: s_waitcnt vmcnt(0)
326 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
327 ; GCN-NEXT: s_waitcnt vmcnt(0)
328 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
329 %tid = call i32 @llvm.amdgcn.workitem.id.x()
330 %tid.ext = sext i32 %tid to i64
331 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
332 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
333 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
334 %a = load volatile float, ptr addrspace(1) %a.gep
335 %b = load volatile float, ptr addrspace(1) %b.gep
336 %mul = fmul float %a, %b
337 %fneg = fneg float %mul
338 %use1 = fmul float %mul, 4.0
339 store volatile float %fneg, ptr addrspace(1) %out
340 store volatile float %use1, ptr addrspace(1) %out
344 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
345 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
346 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
347 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
348 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
349 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
350 %tid = call i32 @llvm.amdgcn.workitem.id.x()
351 %tid.ext = sext i32 %tid to i64
352 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
353 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
354 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
355 %a = load volatile float, ptr addrspace(1) %a.gep
356 %b = load volatile float, ptr addrspace(1) %b.gep
357 %fneg.a = fneg float %a
358 %mul = fmul float %fneg.a, %b
359 %fneg = fneg float %mul
360 store volatile float %fneg, ptr addrspace(1) %out
364 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
365 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
366 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
367 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
368 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
369 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
370 %tid = call i32 @llvm.amdgcn.workitem.id.x()
371 %tid.ext = sext i32 %tid to i64
372 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
373 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
374 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
375 %a = load volatile float, ptr addrspace(1) %a.gep
376 %b = load volatile float, ptr addrspace(1) %b.gep
377 %fneg.b = fneg float %b
378 %mul = fmul float %a, %fneg.b
379 %fneg = fneg float %mul
380 store volatile float %fneg, ptr addrspace(1) %out
384 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
385 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
386 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
387 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
388 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
389 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
390 %tid = call i32 @llvm.amdgcn.workitem.id.x()
391 %tid.ext = sext i32 %tid to i64
392 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
393 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
394 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
395 %a = load volatile float, ptr addrspace(1) %a.gep
396 %b = load volatile float, ptr addrspace(1) %b.gep
397 %fneg.a = fneg float %a
398 %fneg.b = fneg float %b
399 %mul = fmul float %fneg.a, %fneg.b
400 %fneg = fneg float %mul
401 store volatile float %fneg, ptr addrspace(1) %out
405 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
406 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
407 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
408 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
409 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
411 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
412 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
413 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
414 %tid = call i32 @llvm.amdgcn.workitem.id.x()
415 %tid.ext = sext i32 %tid to i64
416 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
417 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
418 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
419 %a = load volatile float, ptr addrspace(1) %a.gep
420 %b = load volatile float, ptr addrspace(1) %b.gep
421 %fneg.a = fneg float %a
422 %mul = fmul float %fneg.a, %b
423 %fneg = fneg float %mul
424 store volatile float %fneg, ptr addrspace(1) %out
425 store volatile float %fneg.a, ptr addrspace(1) %out
429 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
430 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
431 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
432 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
433 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
434 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
436 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
437 %tid = call i32 @llvm.amdgcn.workitem.id.x()
438 %tid.ext = sext i32 %tid to i64
439 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
440 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
441 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
442 %a = load volatile float, ptr addrspace(1) %a.gep
443 %b = load volatile float, ptr addrspace(1) %b.gep
444 %fneg.a = fneg float %a
445 %mul = fmul float %fneg.a, %b
446 %fneg = fneg float %mul
447 %use1 = fmul float %fneg.a, %c
448 store volatile float %fneg, ptr addrspace(1) %out
449 store volatile float %use1, ptr addrspace(1) %out
453 ; --------------------------------------------------------------------------------
455 ; --------------------------------------------------------------------------------
457 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
458 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
459 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
460 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
461 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
462 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
463 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
464 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
465 %tid = call i32 @llvm.amdgcn.workitem.id.x()
466 %tid.ext = sext i32 %tid to i64
467 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
468 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
469 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
470 %a = load volatile float, ptr addrspace(1) %a.gep
471 %b = load volatile float, ptr addrspace(1) %b.gep
472 %min = call float @llvm.minnum.f32(float %a, float %b)
473 %fneg = fneg float %min
474 store float %fneg, ptr addrspace(1) %out.gep
478 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
481 ; GCN: v_max_f32_e64 v0, -v0, -v1
483 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
484 %min = call float @llvm.minnum.f32(float %a, float %b)
485 %fneg = fneg float %min
489 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
490 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
491 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
492 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
493 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
494 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
495 %tid = call i32 @llvm.amdgcn.workitem.id.x()
496 %tid.ext = sext i32 %tid to i64
497 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
498 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
499 %a = load volatile float, ptr addrspace(1) %a.gep
500 %min = call float @llvm.minnum.f32(float %a, float %a)
501 %min.fneg = fneg float %min
502 store float %min.fneg, ptr addrspace(1) %out.gep
506 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
508 ; GCN: v_max_f32_e64 v0, -v0, -v0
510 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
511 %min = call float @llvm.minnum.f32(float %a, float %a)
512 %min.fneg = fneg float %min
516 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
517 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
518 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
519 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
520 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
521 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
522 %tid = call i32 @llvm.amdgcn.workitem.id.x()
523 %tid.ext = sext i32 %tid to i64
524 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
525 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
526 %a = load volatile float, ptr addrspace(1) %a.gep
527 %min = call float @llvm.minnum.f32(float 4.0, float %a)
528 %fneg = fneg float %min
529 store float %fneg, ptr addrspace(1) %out.gep
533 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
535 ; GCN: v_max_f32_e64 v0, -v0, -4.0
537 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
538 %min = call float @llvm.minnum.f32(float 4.0, float %a)
539 %fneg = fneg float %min
543 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
544 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
545 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
546 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
547 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
548 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
549 %tid = call i32 @llvm.amdgcn.workitem.id.x()
550 %tid.ext = sext i32 %tid to i64
551 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
552 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
553 %a = load volatile float, ptr addrspace(1) %a.gep
554 %min = call float @llvm.minnum.f32(float -4.0, float %a)
555 %fneg = fneg float %min
556 store float %fneg, ptr addrspace(1) %out.gep
560 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
562 ; GCN: v_max_f32_e64 v0, -v0, 4.0
564 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
565 %min = call float @llvm.minnum.f32(float -4.0, float %a)
566 %fneg = fneg float %min
570 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
571 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
573 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
574 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MIN]]
575 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
576 define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
577 %tid = call i32 @llvm.amdgcn.workitem.id.x()
578 %tid.ext = sext i32 %tid to i64
579 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
580 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
581 %a = load volatile float, ptr addrspace(1) %a.gep
582 %min = call nnan float @llvm.minnum.f32(float 0.0, float %a)
583 %fneg = fneg float %min
584 store float %fneg, ptr addrspace(1) %out.gep
588 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
589 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
590 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
591 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
592 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
593 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
594 %tid = call i32 @llvm.amdgcn.workitem.id.x()
595 %tid.ext = sext i32 %tid to i64
596 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
597 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
598 %a = load volatile float, ptr addrspace(1) %a.gep
599 %min = call float @llvm.minnum.f32(float -0.0, float %a)
600 %fneg = fneg float %min
601 store float %fneg, ptr addrspace(1) %out.gep
605 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
606 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
608 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
609 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
611 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
612 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
613 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
615 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
616 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
617 %tid = call i32 @llvm.amdgcn.workitem.id.x()
618 %tid.ext = sext i32 %tid to i64
619 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
620 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
621 %a = load volatile float, ptr addrspace(1) %a.gep
622 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
623 %fneg = fneg float %min
624 store float %fneg, ptr addrspace(1) %out.gep
628 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
629 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
631 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
632 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
634 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
635 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
637 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
638 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
639 %tid = call i32 @llvm.amdgcn.workitem.id.x()
640 %tid.ext = sext i32 %tid to i64
641 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
642 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
643 %a = load volatile float, ptr addrspace(1) %a.gep
644 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
645 %fneg = fneg float %min
646 store float %fneg, ptr addrspace(1) %out.gep
650 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
651 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
653 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
654 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
655 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
657 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
658 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
659 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
661 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
662 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
663 %tid = call i32 @llvm.amdgcn.workitem.id.x()
664 %tid.ext = sext i32 %tid to i64
665 %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
666 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
667 %a = load volatile half, ptr addrspace(1) %a.gep
668 %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
669 %fneg = fsub half -0.000000e+00, %min
670 store half %fneg, ptr addrspace(1) %out.gep
674 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
675 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
677 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
678 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
679 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
681 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
682 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
684 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
685 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
686 %tid = call i32 @llvm.amdgcn.workitem.id.x()
687 %tid.ext = sext i32 %tid to i64
688 %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
689 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
690 %a = load volatile half, ptr addrspace(1) %a.gep
691 %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
692 %fneg = fsub half -0.000000e+00, %min
693 store half %fneg, ptr addrspace(1) %out.gep
697 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
698 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
700 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
701 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
702 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
703 ; SI: v_max_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
705 ; VI: v_min_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[A]], 0.15915494
706 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
708 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
709 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
710 %tid = call i32 @llvm.amdgcn.workitem.id.x()
711 %tid.ext = sext i32 %tid to i64
712 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
713 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
714 %a = load volatile double, ptr addrspace(1) %a.gep
715 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
716 %fneg = fsub double -0.000000e+00, %min
717 store double %fneg, ptr addrspace(1) %out.gep
721 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
722 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
724 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
725 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
726 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
727 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
729 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
730 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
732 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
733 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
734 %tid = call i32 @llvm.amdgcn.workitem.id.x()
735 %tid.ext = sext i32 %tid to i64
736 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
737 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
738 %a = load volatile double, ptr addrspace(1) %a.gep
739 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
740 %fneg = fsub double -0.000000e+00, %min
741 store double %fneg, ptr addrspace(1) %out.gep
745 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
747 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
749 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
750 %min = call float @llvm.minnum.f32(float -0.0, float %a)
751 %fneg = fneg float %min
755 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
756 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
757 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
758 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
759 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
760 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
761 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
762 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
763 %tid = call i32 @llvm.amdgcn.workitem.id.x()
764 %tid.ext = sext i32 %tid to i64
765 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
766 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
767 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
768 %a = load volatile float, ptr addrspace(1) %a.gep
769 %b = load volatile float, ptr addrspace(1) %b.gep
770 %min = call float @llvm.minnum.f32(float 0.0, float %a)
771 %fneg = fneg float %min
772 %mul = fmul float %fneg, %b
773 store float %mul, ptr addrspace(1) %out.gep
777 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
778 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
779 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
781 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
783 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
784 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
786 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
787 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
788 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
790 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
791 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
792 %tid = call i32 @llvm.amdgcn.workitem.id.x()
793 %tid.ext = sext i32 %tid to i64
794 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
795 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
796 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
797 %a = load volatile float, ptr addrspace(1) %a.gep
798 %b = load volatile float, ptr addrspace(1) %b.gep
799 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
800 %fneg = fneg float %min
801 %mul = fmul float %fneg, %b
802 store float %mul, ptr addrspace(1) %out.gep
806 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
809 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
810 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
812 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
813 %min = call float @llvm.minnum.f32(float 0.0, float %a)
814 %fneg = fneg float %min
815 %mul = fmul float %fneg, %b
819 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
820 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
821 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
822 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
823 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
824 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
825 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
826 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
827 ; GCN-NEXT: s_waitcnt vmcnt(0)
828 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
829 ; GCN-NEXT: s_waitcnt vmcnt(0)
830 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
831 %tid = call i32 @llvm.amdgcn.workitem.id.x()
832 %tid.ext = sext i32 %tid to i64
833 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
834 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
835 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
836 %a = load volatile float, ptr addrspace(1) %a.gep
837 %b = load volatile float, ptr addrspace(1) %b.gep
838 %min = call float @llvm.minnum.f32(float %a, float %b)
839 %fneg = fneg float %min
840 %use1 = fmul float %min, 4.0
841 store volatile float %fneg, ptr addrspace(1) %out
842 store volatile float %use1, ptr addrspace(1) %out
846 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
849 ; GCN: v_max_f32_e64 v0, -v0, -v1
850 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
852 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
853 %min = call float @llvm.minnum.f32(float %a, float %b)
854 %fneg = fneg float %min
855 %use1 = fmul float %min, 4.0
856 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
857 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
858 ret <2 x float> %ins1
861 ; --------------------------------------------------------------------------------
863 ; --------------------------------------------------------------------------------
866 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
867 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
868 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
869 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
870 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
871 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
872 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
873 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
874 %tid = call i32 @llvm.amdgcn.workitem.id.x()
875 %tid.ext = sext i32 %tid to i64
876 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
877 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
878 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
879 %a = load volatile float, ptr addrspace(1) %a.gep
880 %b = load volatile float, ptr addrspace(1) %b.gep
881 %max = call float @llvm.maxnum.f32(float %a, float %b)
882 %fneg = fneg float %max
883 store float %fneg, ptr addrspace(1) %out.gep
887 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
890 ; GCN: v_min_f32_e64 v0, -v0, -v1
892 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
893 %max = call float @llvm.maxnum.f32(float %a, float %b)
894 %fneg = fneg float %max
898 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
899 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
900 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
901 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
902 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
903 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
904 %tid = call i32 @llvm.amdgcn.workitem.id.x()
905 %tid.ext = sext i32 %tid to i64
906 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
907 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
908 %a = load volatile float, ptr addrspace(1) %a.gep
909 %max = call float @llvm.maxnum.f32(float %a, float %a)
910 %max.fneg = fneg float %max
911 store float %max.fneg, ptr addrspace(1) %out.gep
915 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
917 ; GCN: v_min_f32_e64 v0, -v0, -v0
919 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
920 %max = call float @llvm.maxnum.f32(float %a, float %a)
921 %max.fneg = fneg float %max
925 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
926 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
927 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
928 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
929 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
930 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
931 %tid = call i32 @llvm.amdgcn.workitem.id.x()
932 %tid.ext = sext i32 %tid to i64
933 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
934 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
935 %a = load volatile float, ptr addrspace(1) %a.gep
936 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
937 %fneg = fneg float %max
938 store float %fneg, ptr addrspace(1) %out.gep
942 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
944 ; GCN: v_min_f32_e64 v0, -v0, -4.0
946 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
947 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
948 %fneg = fneg float %max
952 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
953 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
954 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
955 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
956 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
957 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
958 %tid = call i32 @llvm.amdgcn.workitem.id.x()
959 %tid.ext = sext i32 %tid to i64
960 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
961 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
962 %a = load volatile float, ptr addrspace(1) %a.gep
963 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
964 %fneg = fneg float %max
965 store float %fneg, ptr addrspace(1) %out.gep
969 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
971 ; GCN: v_min_f32_e64 v0, -v0, 4.0
973 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
974 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
975 %fneg = fneg float %max
979 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
980 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
982 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
983 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
984 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
985 define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
986 %tid = call i32 @llvm.amdgcn.workitem.id.x()
987 %tid.ext = sext i32 %tid to i64
988 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
989 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
990 %a = load volatile float, ptr addrspace(1) %a.gep
991 %max = call nnan float @llvm.maxnum.f32(float 0.0, float %a)
992 %fneg = fneg float %max
993 store float %fneg, ptr addrspace(1) %out.gep
997 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
998 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
999 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
1000 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
1001 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1002 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1003 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1004 %tid.ext = sext i32 %tid to i64
1005 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1006 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1007 %a = load volatile float, ptr addrspace(1) %a.gep
1008 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1009 %fneg = fneg float %max
1010 store float %fneg, ptr addrspace(1) %out.gep
1014 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
1016 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
1017 ; GCN-NEXT: ; return
1018 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
1019 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1020 %fneg = fneg float %max
1024 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
1025 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1026 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1027 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
1028 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
1029 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
1030 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1031 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1032 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1033 %tid.ext = sext i32 %tid to i64
1034 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1035 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1036 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1037 %a = load volatile float, ptr addrspace(1) %a.gep
1038 %b = load volatile float, ptr addrspace(1) %b.gep
1039 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1040 %fneg = fneg float %max
1041 %mul = fmul float %fneg, %b
1042 store float %mul, ptr addrspace(1) %out.gep
1046 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1049 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
1050 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
1051 ; GCN-NEXT: ; return
1052 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
1053 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1054 %fneg = fneg float %max
1055 %mul = fmul float %fneg, %b
1059 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1060 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1061 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1062 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
1063 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1064 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1065 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1066 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1067 ; GCN-NEXT: s_waitcnt vmcnt(0)
1068 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1069 ; GCN-NEXT: s_waitcnt vmcnt(0)
1070 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1071 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1072 %tid.ext = sext i32 %tid to i64
1073 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1074 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1075 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1076 %a = load volatile float, ptr addrspace(1) %a.gep
1077 %b = load volatile float, ptr addrspace(1) %b.gep
1078 %max = call float @llvm.maxnum.f32(float %a, float %b)
1079 %fneg = fneg float %max
1080 %use1 = fmul float %max, 4.0
1081 store volatile float %fneg, ptr addrspace(1) %out
1082 store volatile float %use1, ptr addrspace(1) %out
1086 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1089 ; GCN: v_min_f32_e64 v0, -v0, -v1
1090 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1091 ; GCN-NEXT: ; return
1092 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1093 %max = call float @llvm.maxnum.f32(float %a, float %b)
1094 %fneg = fneg float %max
1095 %use1 = fmul float %max, 4.0
1096 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1097 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1098 ret <2 x float> %ins1
1101 ; --------------------------------------------------------------------------------
1103 ; --------------------------------------------------------------------------------
1105 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1106 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1107 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1108 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1110 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1111 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1113 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1114 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1115 define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1116 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1117 %tid.ext = sext i32 %tid to i64
1118 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1119 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1120 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1121 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1122 %a = load volatile float, ptr addrspace(1) %a.gep
1123 %b = load volatile float, ptr addrspace(1) %b.gep
1124 %c = load volatile float, ptr addrspace(1) %c.gep
1125 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1126 %fneg = fneg float %fma
1127 store float %fneg, ptr addrspace(1) %out.gep
1131 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1132 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1133 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1134 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1135 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1136 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1137 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1138 ; GCN-NEXT: s_waitcnt vmcnt(0)
1139 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1140 ; GCN-NEXT: s_waitcnt vmcnt(0)
1141 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1142 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1143 %tid.ext = sext i32 %tid to i64
1144 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1145 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1146 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1147 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1148 %a = load volatile float, ptr addrspace(1) %a.gep
1149 %b = load volatile float, ptr addrspace(1) %b.gep
1150 %c = load volatile float, ptr addrspace(1) %c.gep
1151 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1152 %fneg = fneg float %fma
1153 store volatile float %fneg, ptr addrspace(1) %out
1154 store volatile float %fma, ptr addrspace(1) %out
1158 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1159 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1160 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1161 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1163 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1165 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1167 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1168 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1170 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1171 ; GCN-NEXT: s_waitcnt vmcnt(0)
1172 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1173 ; GCN-NEXT: s_waitcnt vmcnt(0)
1174 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1175 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1176 %tid.ext = sext i32 %tid to i64
1177 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1178 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1179 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1180 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1181 %a = load volatile float, ptr addrspace(1) %a.gep
1182 %b = load volatile float, ptr addrspace(1) %b.gep
1183 %c = load volatile float, ptr addrspace(1) %c.gep
1184 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1185 %fneg = fneg float %fma
1186 %use1 = fmul float %fma, 4.0
1187 store volatile float %fneg, ptr addrspace(1) %out
1188 store volatile float %use1, ptr addrspace(1) %out
1192 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1193 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1194 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1195 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1197 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1198 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1200 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1201 ; GCN-NSZ-NOT: [[FMA]]
1202 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1203 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1204 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1205 %tid.ext = sext i32 %tid to i64
1206 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1207 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1208 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1209 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1210 %a = load volatile float, ptr addrspace(1) %a.gep
1211 %b = load volatile float, ptr addrspace(1) %b.gep
1212 %c = load volatile float, ptr addrspace(1) %c.gep
1213 %fneg.a = fneg float %a
1214 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1215 %fneg = fneg float %fma
1216 store volatile float %fneg, ptr addrspace(1) %out
1220 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1221 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1222 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1223 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1225 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1226 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1228 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1229 ; GCN-NSZ-NOT: [[FMA]]
1230 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1231 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1232 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1233 %tid.ext = sext i32 %tid to i64
1234 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1235 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1236 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1237 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1238 %a = load volatile float, ptr addrspace(1) %a.gep
1239 %b = load volatile float, ptr addrspace(1) %b.gep
1240 %c = load volatile float, ptr addrspace(1) %c.gep
1241 %fneg.b = fneg float %b
1242 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1243 %fneg = fneg float %fma
1244 store volatile float %fneg, ptr addrspace(1) %out
1248 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1249 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1250 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1251 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1253 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1254 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1256 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1257 ; GCN-NSZ-NOT: [[FMA]]
1258 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1259 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1260 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1261 %tid.ext = sext i32 %tid to i64
1262 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1263 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1264 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1265 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1266 %a = load volatile float, ptr addrspace(1) %a.gep
1267 %b = load volatile float, ptr addrspace(1) %b.gep
1268 %c = load volatile float, ptr addrspace(1) %c.gep
1269 %fneg.a = fneg float %a
1270 %fneg.b = fneg float %b
1271 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1272 %fneg = fneg float %fma
1273 store volatile float %fneg, ptr addrspace(1) %out
1277 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1278 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1279 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1280 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1282 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1283 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1285 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1286 ; GCN-NSZ-NOT: [[FMA]]
1287 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1288 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1289 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1290 %tid.ext = sext i32 %tid to i64
1291 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1292 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1293 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1294 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1295 %a = load volatile float, ptr addrspace(1) %a.gep
1296 %b = load volatile float, ptr addrspace(1) %b.gep
1297 %c = load volatile float, ptr addrspace(1) %c.gep
1298 %fneg.a = fneg float %a
1299 %fneg.c = fneg float %c
1300 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1301 %fneg = fneg float %fma
1302 store volatile float %fneg, ptr addrspace(1) %out
1306 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1307 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1308 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1309 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1311 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1312 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1314 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1315 ; GCN-NSZ-NOT: [[FMA]]
1316 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1317 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1318 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1319 %tid.ext = sext i32 %tid to i64
1320 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1321 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1322 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1323 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1324 %a = load volatile float, ptr addrspace(1) %a.gep
1325 %b = load volatile float, ptr addrspace(1) %b.gep
1326 %c = load volatile float, ptr addrspace(1) %c.gep
1327 %fneg.c = fneg float %c
1328 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1329 %fneg = fneg float %fma
1330 store volatile float %fneg, ptr addrspace(1) %out
1334 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1335 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1336 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1337 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1339 ; GCN-SAFE: v_xor_b32
1340 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1341 ; GCN-SAFE: v_xor_b32
1343 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1344 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1346 ; GCN-NSZ-NOT: [[FMA]]
1347 ; GCN-NSZ-NOT: [[NEG_A]]
1348 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1349 ; GCN-NSZ-NOT: [[NEG_A]]
1350 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1351 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1352 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1353 %tid.ext = sext i32 %tid to i64
1354 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1355 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1356 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1357 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1358 %a = load volatile float, ptr addrspace(1) %a.gep
1359 %b = load volatile float, ptr addrspace(1) %b.gep
1360 %c = load volatile float, ptr addrspace(1) %c.gep
1361 %fneg.a = fneg float %a
1362 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1363 %fneg = fneg float %fma
1364 store volatile float %fneg, ptr addrspace(1) %out
1365 store volatile float %fneg.a, ptr addrspace(1) %out
1369 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1370 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1371 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1372 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1374 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1375 ; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]]
1376 ; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1378 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1379 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1380 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
1381 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1382 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
1383 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, float %d) #0 {
1384 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1385 %tid.ext = sext i32 %tid to i64
1386 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1387 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1388 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1389 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1390 %a = load volatile float, ptr addrspace(1) %a.gep
1391 %b = load volatile float, ptr addrspace(1) %b.gep
1392 %c = load volatile float, ptr addrspace(1) %c.gep
1393 %fneg.a = fneg float %a
1394 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1395 %fneg = fneg float %fma
1396 %use1 = fmul float %fneg.a, %d
1397 store volatile float %fneg, ptr addrspace(1) %out
1398 store volatile float %use1, ptr addrspace(1) %out
1402 ; --------------------------------------------------------------------------------
1404 ; --------------------------------------------------------------------------------
1406 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1407 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1408 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1409 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1411 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1412 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1414 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1415 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1416 define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1417 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1418 %tid.ext = sext i32 %tid to i64
1419 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1420 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1421 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1422 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1423 %a = load volatile float, ptr addrspace(1) %a.gep
1424 %b = load volatile float, ptr addrspace(1) %b.gep
1425 %c = load volatile float, ptr addrspace(1) %c.gep
1426 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1427 %fneg = fneg float %fma
1428 store float %fneg, ptr addrspace(1) %out.gep
1432 ; GCN-LABEL: {{^}}v_fneg_fmad_v4f32:
1434 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1435 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1436 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1437 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1438 define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1439 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1440 %tid.ext = sext i32 %tid to i64
1441 %a.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a.ptr, i64 %tid.ext
1442 %b.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %b.ptr, i64 %tid.ext
1443 %c.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %c.ptr, i64 %tid.ext
1444 %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext
1445 %a = load volatile <4 x float>, ptr addrspace(1) %a.gep
1446 %b = load volatile <4 x float>, ptr addrspace(1) %b.gep
1447 %c = load volatile <4 x float>, ptr addrspace(1) %c.gep
1448 %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1449 %fneg = fneg <4 x float> %fma
1450 store <4 x float> %fneg, ptr addrspace(1) %out.gep
1454 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1455 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1456 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1457 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1459 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1460 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1461 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1463 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
1464 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1466 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1467 ; GCN-NEXT: s_waitcnt vmcnt(0)
1468 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1469 ; GCN-NEXT: s_waitcnt vmcnt(0)
1470 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
1471 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1472 %tid.ext = sext i32 %tid to i64
1473 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1474 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1475 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
1476 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1477 %a = load volatile float, ptr addrspace(1) %a.gep
1478 %b = load volatile float, ptr addrspace(1) %b.gep
1479 %c = load volatile float, ptr addrspace(1) %c.gep
1480 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1481 %fneg = fneg float %fma
1482 %use1 = fmul float %fma, 4.0
1483 store volatile float %fneg, ptr addrspace(1) %out
1484 store volatile float %use1, ptr addrspace(1) %out
1488 ; --------------------------------------------------------------------------------
1490 ; --------------------------------------------------------------------------------
1492 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1493 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1494 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1495 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1496 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1497 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1498 %tid.ext = sext i32 %tid to i64
1499 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1500 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
1501 %a = load volatile float, ptr addrspace(1) %a.gep
1502 %fpext = fpext float %a to double
1503 %fneg = fsub double -0.000000e+00, %fpext
1504 store double %fneg, ptr addrspace(1) %out.gep
1508 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1509 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1510 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1511 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1512 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1513 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1514 %tid.ext = sext i32 %tid to i64
1515 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1516 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
1517 %a = load volatile float, ptr addrspace(1) %a.gep
1518 %fneg.a = fneg float %a
1519 %fpext = fpext float %fneg.a to double
1520 %fneg = fsub double -0.000000e+00, %fpext
1521 store double %fneg, ptr addrspace(1) %out.gep
1525 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1526 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1527 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1528 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1529 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1530 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1531 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1532 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1533 %tid.ext = sext i32 %tid to i64
1534 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1535 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
1536 %a = load volatile float, ptr addrspace(1) %a.gep
1537 %fneg.a = fneg float %a
1538 %fpext = fpext float %fneg.a to double
1539 %fneg = fsub double -0.000000e+00, %fpext
1540 store volatile double %fneg, ptr addrspace(1) %out.gep
1541 store volatile float %fneg.a, ptr addrspace(1) undef
1545 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1546 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1547 ; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
1548 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1549 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
1550 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
1551 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1552 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1553 %tid.ext = sext i32 %tid to i64
1554 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1555 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
1556 %a = load volatile float, ptr addrspace(1) %a.gep
1557 %fpext = fpext float %a to double
1558 %fneg = fsub double -0.000000e+00, %fpext
1559 store volatile double %fneg, ptr addrspace(1) %out.gep
1560 store volatile double %fpext, ptr addrspace(1) undef
1564 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1565 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1566 ; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
1567 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1568 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
1569 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
1570 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1571 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1572 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1573 %tid.ext = sext i32 %tid to i64
1574 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1575 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
1576 %a = load volatile float, ptr addrspace(1) %a.gep
1577 %fpext = fpext float %a to double
1578 %fneg = fsub double -0.000000e+00, %fpext
1579 %mul = fmul double %fpext, 4.0
1580 store volatile double %fneg, ptr addrspace(1) %out.gep
1581 store volatile double %mul, ptr addrspace(1) %out.gep
1585 ; FIXME: Source modifiers not folded for f16->f32
1586 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1587 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1588 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1589 %tid.ext = sext i32 %tid to i64
1590 %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
1591 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1592 %a = load volatile half, ptr addrspace(1) %a.gep
1593 %fpext = fpext half %a to float
1594 %fneg = fneg float %fpext
1595 store volatile float %fneg, ptr addrspace(1) %out.gep
1596 store volatile float %fpext, ptr addrspace(1) %out.gep
1600 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1601 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1602 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1603 %tid.ext = sext i32 %tid to i64
1604 %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
1605 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1606 %a = load volatile half, ptr addrspace(1) %a.gep
1607 %fpext = fpext half %a to float
1608 %fneg = fneg float %fpext
1609 %mul = fmul float %fpext, 4.0
1610 store volatile float %fneg, ptr addrspace(1) %out.gep
1611 store volatile float %mul, ptr addrspace(1) %out.gep
1615 ; --------------------------------------------------------------------------------
1617 ; --------------------------------------------------------------------------------
1619 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1620 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1621 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1622 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1623 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1624 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1625 %tid.ext = sext i32 %tid to i64
1626 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
1627 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1628 %a = load volatile double, ptr addrspace(1) %a.gep
1629 %fpround = fptrunc double %a to float
1630 %fneg = fneg float %fpround
1631 store float %fneg, ptr addrspace(1) %out.gep
1635 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1636 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1637 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1638 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1639 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1640 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1641 %tid.ext = sext i32 %tid to i64
1642 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
1643 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1644 %a = load volatile double, ptr addrspace(1) %a.gep
1645 %fneg.a = fsub double -0.000000e+00, %a
1646 %fpround = fptrunc double %fneg.a to float
1647 %fneg = fneg float %fpround
1648 store float %fneg, ptr addrspace(1) %out.gep
1652 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1653 ; GCN: {{buffer|flat}}_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
1654 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v[[[A_LO]]:[[A_HI]]]
1655 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1656 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1657 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
1658 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1659 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1660 %tid.ext = sext i32 %tid to i64
1661 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
1662 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1663 %a = load volatile double, ptr addrspace(1) %a.gep
1664 %fneg.a = fsub double -0.000000e+00, %a
1665 %fpround = fptrunc double %fneg.a to float
1666 %fneg = fneg float %fpround
1667 store volatile float %fneg, ptr addrspace(1) %out.gep
1668 store volatile double %fneg.a, ptr addrspace(1) undef
1672 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1673 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1674 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1675 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s[
1677 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1678 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1679 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, double %c) #0 {
1680 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1681 %tid.ext = sext i32 %tid to i64
1682 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
1683 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1684 %a = load volatile double, ptr addrspace(1) %a.gep
1685 %fneg.a = fsub double -0.000000e+00, %a
1686 %fpround = fptrunc double %fneg.a to float
1687 %fneg = fneg float %fpround
1688 %use1 = fmul double %fneg.a, %c
1689 store volatile float %fneg, ptr addrspace(1) %out.gep
1690 store volatile double %use1, ptr addrspace(1) undef
1694 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1695 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1696 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1697 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1698 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1699 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1700 %tid.ext = sext i32 %tid to i64
1701 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1702 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
1703 %a = load volatile float, ptr addrspace(1) %a.gep
1704 %fpround = fptrunc float %a to half
1705 %fneg = fsub half -0.000000e+00, %fpround
1706 store half %fneg, ptr addrspace(1) %out.gep
1710 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1711 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1712 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1713 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1714 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1715 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1716 %tid.ext = sext i32 %tid to i64
1717 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1718 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
1719 %a = load volatile float, ptr addrspace(1) %a.gep
1720 %fneg.a = fneg float %a
1721 %fpround = fptrunc float %fneg.a to half
1722 %fneg = fsub half -0.000000e+00, %fpround
1723 store half %fneg, ptr addrspace(1) %out.gep
1727 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1728 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1729 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1730 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1731 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1732 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1733 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1734 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1735 %tid.ext = sext i32 %tid to i64
1736 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
1737 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1738 %a = load volatile double, ptr addrspace(1) %a.gep
1739 %fpround = fptrunc double %a to float
1740 %fneg = fneg float %fpround
1741 store volatile float %fneg, ptr addrspace(1) %out.gep
1742 store volatile float %fpround, ptr addrspace(1) %out.gep
1746 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1747 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1748 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1749 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1750 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1751 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1752 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1753 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1754 %tid.ext = sext i32 %tid to i64
1755 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1756 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
1757 %a = load volatile float, ptr addrspace(1) %a.gep
1758 %fneg.a = fneg float %a
1759 %fpround = fptrunc float %fneg.a to half
1760 %fneg = fsub half -0.000000e+00, %fpround
1761 store volatile half %fneg, ptr addrspace(1) %out.gep
1762 store volatile float %fneg.a, ptr addrspace(1) undef
1766 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1767 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1768 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1769 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1770 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1771 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1772 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
1773 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1774 %tid.ext = sext i32 %tid to i64
1775 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1776 %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
1777 %a = load volatile float, ptr addrspace(1) %a.gep
1778 %fneg.a = fneg float %a
1779 %fpround = fptrunc float %fneg.a to half
1780 %fneg = fsub half -0.000000e+00, %fpround
1781 %use1 = fmul float %fneg.a, %c
1782 store volatile half %fneg, ptr addrspace(1) %out.gep
1783 store volatile float %use1, ptr addrspace(1) undef
1787 ; --------------------------------------------------------------------------------
1789 ; --------------------------------------------------------------------------------
1791 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1792 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1793 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1794 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1795 define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1796 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1797 %tid.ext = sext i32 %tid to i64
1798 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1799 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1800 %a = load volatile float, ptr addrspace(1) %a.gep
1801 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1802 %fneg = fneg float %rcp
1803 store float %fneg, ptr addrspace(1) %out.gep
1807 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1808 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1809 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1810 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1811 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1812 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1813 %tid.ext = sext i32 %tid to i64
1814 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1815 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1816 %a = load volatile float, ptr addrspace(1) %a.gep
1817 %fneg.a = fneg float %a
1818 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1819 %fneg = fneg float %rcp
1820 store float %fneg, ptr addrspace(1) %out.gep
1824 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1825 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1826 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1827 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1828 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1829 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1830 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
1831 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1832 %tid.ext = sext i32 %tid to i64
1833 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1834 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1835 %a = load volatile float, ptr addrspace(1) %a.gep
1836 %fneg.a = fneg float %a
1837 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1838 %fneg = fneg float %rcp
1839 store volatile float %fneg, ptr addrspace(1) %out.gep
1840 store volatile float %fneg.a, ptr addrspace(1) undef
1844 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1845 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1846 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1847 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1848 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1849 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1850 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
1851 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1852 %tid.ext = sext i32 %tid to i64
1853 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1854 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1855 %a = load volatile float, ptr addrspace(1) %a.gep
1856 %fneg.a = fneg float %a
1857 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1858 %fneg = fneg float %rcp
1859 %use1 = fmul float %fneg.a, %c
1860 store volatile float %fneg, ptr addrspace(1) %out.gep
1861 store volatile float %use1, ptr addrspace(1) undef
1865 ; --------------------------------------------------------------------------------
1867 ; --------------------------------------------------------------------------------
1869 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1870 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1871 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1872 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1873 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1874 define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1875 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1876 %tid.ext = sext i32 %tid to i64
1877 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1878 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1879 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1880 %a = load volatile float, ptr addrspace(1) %a.gep
1881 %b = load volatile float, ptr addrspace(1) %b.gep
1882 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1883 %fneg = fneg float %mul
1884 store float %fneg, ptr addrspace(1) %out.gep
1888 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1889 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1890 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1891 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1892 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1893 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1894 ; GCN-NEXT: s_waitcnt vmcnt(0)
1895 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1896 ; GCN-NEXT: s_waitcnt vmcnt(0)
1897 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1898 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1899 %tid.ext = sext i32 %tid to i64
1900 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1901 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1902 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1903 %a = load volatile float, ptr addrspace(1) %a.gep
1904 %b = load volatile float, ptr addrspace(1) %b.gep
1905 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1906 %fneg = fneg float %mul
1907 store volatile float %fneg, ptr addrspace(1) %out
1908 store volatile float %mul, ptr addrspace(1) %out
1912 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1913 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1914 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1915 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1916 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1917 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1918 ; GCN-NEXT: s_waitcnt vmcnt(0)
1919 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1920 ; GCN-NEXT: s_waitcnt vmcnt(0)
1921 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1922 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1923 %tid.ext = sext i32 %tid to i64
1924 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1925 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1926 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1927 %a = load volatile float, ptr addrspace(1) %a.gep
1928 %b = load volatile float, ptr addrspace(1) %b.gep
1929 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1930 %fneg = fneg float %mul
1931 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1932 store volatile float %fneg, ptr addrspace(1) %out
1933 store volatile float %use1, ptr addrspace(1) %out
1937 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1938 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1939 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1940 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1941 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1942 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1943 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1944 %tid.ext = sext i32 %tid to i64
1945 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1946 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1947 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1948 %a = load volatile float, ptr addrspace(1) %a.gep
1949 %b = load volatile float, ptr addrspace(1) %b.gep
1950 %fneg.a = fneg float %a
1951 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1952 %fneg = fneg float %mul
1953 store volatile float %fneg, ptr addrspace(1) %out
1957 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1958 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1959 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1960 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1961 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1962 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1963 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1964 %tid.ext = sext i32 %tid to i64
1965 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1966 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1967 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1968 %a = load volatile float, ptr addrspace(1) %a.gep
1969 %b = load volatile float, ptr addrspace(1) %b.gep
1970 %fneg.b = fneg float %b
1971 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1972 %fneg = fneg float %mul
1973 store volatile float %fneg, ptr addrspace(1) %out
1977 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1978 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1979 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1980 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1981 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1982 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
1983 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1984 %tid.ext = sext i32 %tid to i64
1985 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
1986 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
1987 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
1988 %a = load volatile float, ptr addrspace(1) %a.gep
1989 %b = load volatile float, ptr addrspace(1) %b.gep
1990 %fneg.a = fneg float %a
1991 %fneg.b = fneg float %b
1992 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1993 %fneg = fneg float %mul
1994 store volatile float %fneg, ptr addrspace(1) %out
1998 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1999 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2000 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2001 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
2002 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
2003 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
2004 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
2005 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
2006 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2007 %tid.ext = sext i32 %tid to i64
2008 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2009 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2010 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2011 %a = load volatile float, ptr addrspace(1) %a.gep
2012 %b = load volatile float, ptr addrspace(1) %b.gep
2013 %fneg.a = fneg float %a
2014 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2015 %fneg = fneg float %mul
2016 store volatile float %fneg, ptr addrspace(1) %out
2017 store volatile float %fneg.a, ptr addrspace(1) %out
2021 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
2022 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2023 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2024 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
2025 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
2026 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
2027 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2028 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
2029 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2030 %tid.ext = sext i32 %tid to i64
2031 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2032 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2033 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2034 %a = load volatile float, ptr addrspace(1) %a.gep
2035 %b = load volatile float, ptr addrspace(1) %b.gep
2036 %fneg.a = fneg float %a
2037 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2038 %fneg = fneg float %mul
2039 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
2040 store volatile float %fneg, ptr addrspace(1) %out
2041 store volatile float %use1, ptr addrspace(1) %out
2045 ; --------------------------------------------------------------------------------
2047 ; --------------------------------------------------------------------------------
2049 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
2050 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2051 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
2052 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
2053 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
2054 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2055 define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2056 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2057 %tid.ext = sext i32 %tid to i64
2058 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2059 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2060 %a = load volatile float, ptr addrspace(1) %a.gep
2061 %sin = call float @llvm.sin.f32(float %a)
2062 %fneg = fneg float %sin
2063 store float %fneg, ptr addrspace(1) %out.gep
2067 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
2068 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2069 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2070 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2071 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2072 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2073 %tid.ext = sext i32 %tid to i64
2074 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2075 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2076 %a = load volatile float, ptr addrspace(1) %a.gep
2077 %sin = call float @llvm.amdgcn.sin.f32(float %a)
2078 %fneg = fneg float %sin
2079 store float %fneg, ptr addrspace(1) %out.gep
2083 ; --------------------------------------------------------------------------------
2085 ; --------------------------------------------------------------------------------
2087 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
2088 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2089 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2090 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2091 define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2092 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2093 %tid.ext = sext i32 %tid to i64
2094 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2095 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2096 %a = load volatile float, ptr addrspace(1) %a.gep
2097 %trunc = call float @llvm.trunc.f32(float %a)
2098 %fneg = fneg float %trunc
2099 store float %fneg, ptr addrspace(1) %out.gep
2103 ; --------------------------------------------------------------------------------
2105 ; --------------------------------------------------------------------------------
2107 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2108 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2109 ; GCN: v_trunc_f32_e32
2110 ; GCN: v_sub_f32_e32
2111 ; GCN: v_cndmask_b32
2113 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2114 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2116 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2117 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2118 define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2119 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2120 %tid.ext = sext i32 %tid to i64
2121 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2122 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2123 %a = load volatile float, ptr addrspace(1) %a.gep
2124 %round = call float @llvm.round.f32(float %a)
2125 %fneg = fneg float %round
2126 store float %fneg, ptr addrspace(1) %out.gep
2130 ; --------------------------------------------------------------------------------
2132 ; --------------------------------------------------------------------------------
2134 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2136 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2137 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2138 define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2139 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2140 %tid.ext = sext i32 %tid to i64
2141 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2142 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2143 %a = load volatile float, ptr addrspace(1) %a.gep
2144 %rint = call float @llvm.rint.f32(float %a)
2145 %fneg = fneg float %rint
2146 store float %fneg, ptr addrspace(1) %out.gep
2150 ; --------------------------------------------------------------------------------
2152 ; --------------------------------------------------------------------------------
2154 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2155 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2156 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2157 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2158 define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2159 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2160 %tid.ext = sext i32 %tid to i64
2161 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2162 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2163 %a = load volatile float, ptr addrspace(1) %a.gep
2164 %nearbyint = call float @llvm.nearbyint.f32(float %a)
2165 %fneg = fneg float %nearbyint
2166 store float %fneg, ptr addrspace(1) %out.gep
2170 ; --------------------------------------------------------------------------------
2171 ; fcanonicalize tests
2172 ; --------------------------------------------------------------------------------
2174 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2175 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2176 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2177 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2178 define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
2179 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2180 %tid.ext = sext i32 %tid to i64
2181 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2182 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2183 %a = load volatile float, ptr addrspace(1) %a.gep
2184 %trunc = call float @llvm.canonicalize.f32(float %a)
2185 %fneg = fneg float %trunc
2186 store float %fneg, ptr addrspace(1) %out.gep
2190 ; --------------------------------------------------------------------------------
2192 ; --------------------------------------------------------------------------------
2194 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2195 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2196 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2197 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2198 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2199 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2200 define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
2201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2202 %tid.ext = sext i32 %tid to i64
2203 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2204 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2205 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2206 %a = load volatile float, ptr addrspace(1) %a.gep
2207 %b = load volatile float, ptr addrspace(1) %b.gep
2208 %mul = fmul float %a, %b
2209 %fneg = fneg float %mul
2210 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2211 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2212 store volatile float %intrp0, ptr addrspace(1) %out.gep
2213 store volatile float %intrp1, ptr addrspace(1) %out.gep
2217 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2218 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2219 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2220 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2221 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2222 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2223 define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
2224 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2225 %tid.ext = sext i32 %tid to i64
2226 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2227 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2228 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2229 %a = load volatile float, ptr addrspace(1) %a.gep
2230 %b = load volatile float, ptr addrspace(1) %b.gep
2231 %mul = fmul float %a, %b
2232 %fneg = fneg float %mul
2233 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2234 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2235 store volatile float %intrp0, ptr addrspace(1) %out.gep
2236 store volatile float %intrp1, ptr addrspace(1) %out.gep
2240 ; --------------------------------------------------------------------------------
2242 ; --------------------------------------------------------------------------------
2244 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2245 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2246 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2247 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2248 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2249 ; GCN: s_cbranch_scc0
2251 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2254 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2255 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2256 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2258 define amdgpu_kernel void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
2259 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2260 %tid.ext = sext i32 %tid to i64
2261 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2262 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2263 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2264 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2265 %a = load volatile float, ptr addrspace(1) %a.gep
2266 %b = load volatile float, ptr addrspace(1) %b.gep
2267 %c = load volatile float, ptr addrspace(1) %c.gep
2268 %mul = fmul float %a, %b
2269 %fneg = fneg float %mul
2270 %cmp0 = icmp eq i32 %d, 0
2271 br i1 %cmp0, label %if, label %endif
2274 %mul1 = fmul float %fneg, %c
2275 store volatile float %mul1, ptr addrspace(1) %out.gep
2279 store volatile float %mul, ptr addrspace(1) %out.gep
2283 ; --------------------------------------------------------------------------------
2285 ; --------------------------------------------------------------------------------
2287 ; Can't fold into use, so should fold into source
2288 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2289 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2290 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2291 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2292 ; GCN: ; use [[MUL]]
2293 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2294 define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
2295 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2296 %tid.ext = sext i32 %tid to i64
2297 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2298 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2299 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2300 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2301 %a = load volatile float, ptr addrspace(1) %a.gep
2302 %b = load volatile float, ptr addrspace(1) %b.gep
2303 %c = load volatile float, ptr addrspace(1) %c.gep
2304 %mul = fmul float %a, %b
2305 %fneg = fneg float %mul
2306 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2307 store volatile float %fneg, ptr addrspace(1) %out.gep
2311 ; --------------------------------------------------------------------------------
2313 ; --------------------------------------------------------------------------------
2315 ; Can't fold into use, so should fold into source
2316 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2317 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2318 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2319 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2320 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2321 ; GCN: ; use [[NEG]]
2322 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2323 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
2324 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2325 %tid.ext = sext i32 %tid to i64
2326 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2327 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2328 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2329 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2330 %a = load volatile float, ptr addrspace(1) %a.gep
2331 %b = load volatile float, ptr addrspace(1) %b.gep
2332 %c = load volatile float, ptr addrspace(1) %c.gep
2333 %mul = fmul float %a, %b
2334 %fneg = fneg float %mul
2335 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2336 store volatile float %mul, ptr addrspace(1) %out.gep
2340 ; --------------------------------------------------------------------------------
2341 ; code size regression tests
2342 ; --------------------------------------------------------------------------------
2344 ; There are multiple users of the fneg that must use a VOP3
2345 ; instruction, so there is no penalty
2346 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2347 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2348 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2349 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2351 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2352 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2354 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2355 ; GCN-NEXT: s_waitcnt vmcnt(0)
2356 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2357 ; GCN-NEXT: s_waitcnt vmcnt(0)
2358 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
2359 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2360 %tid.ext = sext i32 %tid to i64
2361 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2362 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2363 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2364 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2365 %a = load volatile float, ptr addrspace(1) %a.gep
2366 %b = load volatile float, ptr addrspace(1) %b.gep
2367 %c = load volatile float, ptr addrspace(1) %c.gep
2369 %fneg.a = fneg float %a
2370 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2371 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2373 store volatile float %fma0, ptr addrspace(1) %out
2374 store volatile float %fma1, ptr addrspace(1) %out
2378 ; There are multiple users, but both require using a larger encoding
2381 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2382 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2383 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2384 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2386 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2387 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2388 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2389 ; GCN-NEXT: s_waitcnt vmcnt(0)
2390 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2391 ; GCN-NEXT: s_waitcnt vmcnt(0)
2392 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
2393 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2394 %tid.ext = sext i32 %tid to i64
2395 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2396 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2397 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2398 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2399 %a = load volatile float, ptr addrspace(1) %a.gep
2400 %b = load volatile float, ptr addrspace(1) %b.gep
2401 %c = load volatile float, ptr addrspace(1) %c.gep
2403 %fneg.a = fneg float %a
2404 %mul0 = fmul float %fneg.a, %b
2405 %mul1 = fmul float %fneg.a, %c
2407 store volatile float %mul0, ptr addrspace(1) %out
2408 store volatile float %mul1, ptr addrspace(1) %out
2412 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2413 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2414 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2415 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2416 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2418 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2419 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2421 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2422 ; GCN-NEXT: s_waitcnt vmcnt(0)
2423 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2424 ; GCN-NEXT: s_waitcnt vmcnt(0)
2425 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
2426 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2427 %tid.ext = sext i32 %tid to i64
2428 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2429 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2430 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2431 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2432 %a = load volatile float, ptr addrspace(1) %a.gep
2433 %b = load volatile float, ptr addrspace(1) %b.gep
2434 %c = load volatile float, ptr addrspace(1) %c.gep
2436 %fneg.a = fneg float %a
2437 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2438 %mul1 = fmul float %fneg.a, %c
2440 store volatile float %fma0, ptr addrspace(1) %out
2441 store volatile float %mul1, ptr addrspace(1) %out
2445 ; The use of the fneg requires a code size increase, but folding into
2446 ; the source does not
2448 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2449 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2450 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2451 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2452 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2454 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2455 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2456 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2458 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2459 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2460 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2462 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2463 ; GCN-NEXT: s_waitcnt vmcnt(0)
2464 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2465 ; GCN-NEXT: s_waitcnt vmcnt(0)
2466 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
2467 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2468 %tid.ext = sext i32 %tid to i64
2469 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2470 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2471 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2472 %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
2473 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2474 %a = load volatile float, ptr addrspace(1) %a.gep
2475 %b = load volatile float, ptr addrspace(1) %b.gep
2476 %c = load volatile float, ptr addrspace(1) %c.gep
2477 %d = load volatile float, ptr addrspace(1) %d.gep
2479 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2480 %fneg.fma0 = fneg float %fma0
2481 %mul1 = fmul float %fneg.fma0, %c
2482 %mul2 = fmul float %fneg.fma0, %d
2484 store volatile float %mul1, ptr addrspace(1) %out
2485 store volatile float %mul2, ptr addrspace(1) %out
2489 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2490 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2491 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2492 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2493 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2495 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2496 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2497 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2499 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2500 ; GCN-NEXT: s_waitcnt vmcnt(0)
2501 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2502 ; GCN-NEXT: s_waitcnt vmcnt(0)
2503 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
2504 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2505 %tid.ext = sext i32 %tid to i64
2506 %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
2507 %b.gep = getelementptr inbounds double, ptr addrspace(1) %b.ptr, i64 %tid.ext
2508 %c.gep = getelementptr inbounds double, ptr addrspace(1) %c.ptr, i64 %tid.ext
2509 %d.gep = getelementptr inbounds double, ptr addrspace(1) %d.ptr, i64 %tid.ext
2510 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
2511 %a = load volatile double, ptr addrspace(1) %a.gep
2512 %b = load volatile double, ptr addrspace(1) %b.gep
2513 %c = load volatile double, ptr addrspace(1) %c.gep
2514 %d = load volatile double, ptr addrspace(1) %d.gep
2516 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2517 %fneg.fma0 = fsub double -0.0, %fma0
2518 %mul1 = fmul double %fneg.fma0, %c
2519 %mul2 = fmul double %fneg.fma0, %d
2521 store volatile double %mul1, ptr addrspace(1) %out
2522 store volatile double %mul2, ptr addrspace(1) %out
2526 ; %trunc.a has one fneg use, but it requires a code size increase and
2527 ; %the fneg can instead be folded for free into the fma.
2529 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2530 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2531 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2532 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2533 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2534 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2535 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2536 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
2537 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2538 %tid.ext = sext i32 %tid to i64
2539 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2540 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2541 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2542 %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
2543 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2544 %a = load volatile float, ptr addrspace(1) %a.gep
2545 %b = load volatile float, ptr addrspace(1) %b.gep
2546 %c = load volatile float, ptr addrspace(1) %c.gep
2547 %d = load volatile float, ptr addrspace(1) %d.gep
2549 %trunc.a = call float @llvm.trunc.f32(float %a)
2550 %trunc.fneg.a = fneg float %trunc.a
2551 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2552 store volatile float %fma0, ptr addrspace(1) %out
2556 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2557 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2558 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2559 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2560 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2561 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2562 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2563 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2564 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2565 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2566 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
2567 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2568 %tid.ext = sext i32 %tid to i64
2569 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2570 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
2571 %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
2572 %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
2573 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2574 %a = load volatile float, ptr addrspace(1) %a.gep
2575 %b = load volatile float, ptr addrspace(1) %b.gep
2576 %c = load volatile float, ptr addrspace(1) %c.gep
2577 %d = load volatile float, ptr addrspace(1) %d.gep
2579 %trunc.a = call float @llvm.trunc.f32(float %a)
2580 %trunc.fneg.a = fneg float %trunc.a
2581 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2582 %mul1 = fmul float %trunc.a, %d
2583 store volatile float %fma0, ptr addrspace(1) %out
2584 store volatile float %mul1, ptr addrspace(1) %out
2588 ; The AMDGPU combine to pull fneg into the FMA operands was being
2589 ; undone by the generic combine to pull the fneg out of the fma if
2590 ; !isFNegFree. We were reporting false for v2f32 even though it will
2591 ; be split into f32 where it will be free.
2592 ; GCN-LABEL: {{^}}fneg_fma_fneg_dagcombine_loop:
2593 ; GCN: s_brev_b32 [[NEGZERO:s[0-9]+]], 1{{$}}
2594 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], v2, -v4, [[NEGZERO]]
2595 ; GCN-DAG: v_fma_f32 [[FMA1:v[0-9]+]], v3, -v5, [[NEGZERO]]
2596 ; GCN-DAG: v_sub_f32_e32 [[SUB0:v[0-9]+]], [[FMA0]], v0
2597 ; GCN-DAG: v_sub_f32_e32 [[SUB1:v[0-9]+]], [[FMA1]], v1
2598 ; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4
2599 ; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5
2601 define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
2603 %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
2604 %i4 = fadd fast <2 x float> %i3, %arg
2605 %i5 = fneg <2 x float> %i4
2606 %i6 = fmul fast <2 x float> %i5, %arg2
2610 ; This expects denormal flushing, so can't turn this fmul into fneg
2611 ; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
2613 ; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
2614 define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2615 %mul = fmul float %x, -1.0
2616 %add = fmul nnan float %mul, %y
2620 ; It's legal to turn this fmul into an fneg since denormals are
2621 ; preserved and we know an snan can't happen from the flag.
2622 ; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
2623 ; GCN: v_mul_f32_e64 v0, -v0, v1
2624 ; GCN-NEXT: s_setpc_b64
2625 define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
2626 %mul = fmul nnan float %x, -1.0
2627 %add = fmul float %mul, %y
2631 ; know the source can't be an snan
2632 ; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
2634 ; GCN-NEXT: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
2635 ; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
2636 ; GCN-NEXT: s_setpc_b64
2637 define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
2638 %canonical = fmul float %x, %x
2639 %mul = fmul float %canonical, -1.0
2640 %add = fmul float %mul, %y
2644 ; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
2646 ; GCN-NEXT: v_mul_f32_e32 [[TMP:v[0-9]+]], 1.0, v0
2647 ; GCN-NEXT: v_mul_f32_e64 v0, -[[TMP]], v1
2648 define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2649 %quiet = call float @llvm.canonicalize.f32(float %x)
2650 %mul = fmul float %quiet, -1.0
2651 %add = fmul float %mul, %y
2655 ; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f32:
2656 ; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
2657 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2658 ; GCN-NEXT: v_sub_f32_e32 v0, v3, v0
2659 ; GCN-NEXT: s_setpc_b64
2660 define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
2661 %cmp = icmp eq i32 %arg0, 0
2662 %neg.x = fneg float %x
2663 %neg.y = fneg float %y
2664 %select = select i1 %cmp, float %neg.x, float %neg.y
2665 %add = fadd float %select, %z
2669 ; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f64:
2670 ; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
2671 ; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
2672 ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2673 ; GCN-NEXT: v_add_f64 v[0:1], v[5:6], -v[1:2]
2674 ; GCN-NEXT: s_setpc_b64
2675 define double @fadd_select_fneg_fneg_f64(i32 %arg0, double %x, double %y, double %z) {
2676 %cmp = icmp eq i32 %arg0, 0
2677 %neg.x = fneg double %x
2678 %neg.y = fneg double %y
2679 %select = select i1 %cmp, double %neg.x, double %neg.y
2680 %add = fadd double %select, %z
2684 ; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f16:
2692 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc
2693 ; SI-NEXT: v_sub_f32_e32
2694 ; SI-NEXT: s_setpc_b64
2696 ; VI: v_cmp_eq_u32_e32 vcc, 0, v0
2697 ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2698 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
2699 ; VI-NEXT: s_setpc_b64
2700 define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
2701 %cmp = icmp eq i32 %arg0, 0
2702 %neg.x = fneg half %x
2703 %neg.y = fneg half %y
2704 %select = select i1 %cmp, half %neg.x, half %neg.y
2705 %add = fadd half %select, %z
2709 ; FIXME: Terrible code for SI
2710 ; GCN-LABEL: {{^}}fadd_select_fneg_fneg_v2f16:
2716 ; SI: v_lshlrev_b32_e32
2727 ; VI: v_cmp_eq_u32_e32 vcc, 0, v0
2728 ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2729 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2730 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
2731 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2732 define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2733 %cmp = icmp eq i32 %arg0, 0
2734 %neg.x = fneg <2 x half> %x
2735 %neg.y = fneg <2 x half> %y
2736 %select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y
2737 %add = fadd <2 x half> %select, %z
2741 ; FIXME: This fneg should fold into select
2742 ; GCN-LABEL: {{^}}v_fneg_select_f32:
2744 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2745 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2746 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2747 ; GCN-NEXT: s_setpc_b64
2748 define float @v_fneg_select_f32(i32 %arg0, float %a, float %b, float %c) {
2749 %cond = icmp eq i32 %arg0, 0
2750 %select = select i1 %cond, float %a, float %b
2751 %fneg = fneg float %select
2755 ; FIXME: This fneg should fold into select
2756 ; GCN-LABEL: {{^}}v_fneg_select_2_f32:
2758 ; GCN-NSZ-NEXT: v_add_f32_e32 [[ADD2:v[0-9]+]], 2.0, v1
2759 ; GCN-NSZ-NEXT: v_add_f32_e32 [[ADD4:v[0-9]+]], 4.0, v2
2760 ; GCN-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2761 ; GCN-NSZ-NEXT: v_cndmask_b32_e32 v0, [[ADD4]], [[ADD2]], vcc
2762 ; GCN-NSZ-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2764 ; GCN-SAFE-NEXT: v_add_f32_e32 [[ADD2:v[0-9]+]], 2.0, v1
2765 ; GCN-SAFE-NEXT: v_add_f32_e32 [[ADD4:v[0-9]+]], 4.0, v2
2766 ; GCN-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2767 ; GCN-SAFE-NEXT: v_cndmask_b32_e32 v0, [[ADD4]], [[ADD2]], vcc
2768 ; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2770 ; GCN-NEXT: s_setpc_b64
2771 define float @v_fneg_select_2_f32(i32 %arg0, float %a, float %b, float %c) {
2772 %cond = icmp eq i32 %arg0, 0
2773 %add.0 = fadd float %a, 2.0
2774 %add.1 = fadd float %b, 4.0
2775 %select = select i1 %cond, float %add.0, float %add.1
2776 %neg.select = fneg float %select
2777 ret float %neg.select
2780 ; GCN-LABEL: {{^}}v_fneg_posk_select_f32:
2781 ; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
2782 ; GCN-NEXT: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
2783 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2784 define amdgpu_kernel void @v_fneg_posk_select_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
2785 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2786 %tid.ext = sext i32 %tid to i64
2787 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2788 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2789 %a = load volatile float, ptr addrspace(1) %a.gep
2790 %cond = icmp eq i32 %tid, 0
2791 %select = select i1 %cond, float 4.0, float %a
2792 %fneg = fneg float %select
2793 store float %fneg, ptr addrspace(1) %out.gep
2797 ; GCN-LABEL: {{^}}v_fneg_negk_select_f32:
2798 ; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
2799 ; GCN-NEXT: v_cndmask_b32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}, vcc
2800 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2801 define amdgpu_kernel void @v_fneg_negk_select_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
2802 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2803 %tid.ext = sext i32 %tid to i64
2804 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
2805 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2806 %a = load volatile float, ptr addrspace(1) %a.gep
2807 %cond = icmp eq i32 %tid, 0
2808 %select = select i1 %cond, float -4.0, float %a
2809 %fneg = fneg float %select
2810 store float %fneg, ptr addrspace(1) %out.gep
2814 declare i32 @llvm.amdgcn.workitem.id.x() #1
2815 declare float @llvm.fma.f32(float, float, float) #1
2816 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
2817 declare float @llvm.fmuladd.f32(float, float, float) #1
2818 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
2819 declare float @llvm.sin.f32(float) #1
2820 declare float @llvm.trunc.f32(float) #1
2821 declare float @llvm.round.f32(float) #1
2822 declare float @llvm.rint.f32(float) #1
2823 declare float @llvm.nearbyint.f32(float) #1
2824 declare float @llvm.canonicalize.f32(float) #1
2825 declare float @llvm.minnum.f32(float, float) #1
2826 declare float @llvm.maxnum.f32(float, float) #1
2827 declare half @llvm.minnum.f16(half, half) #1
2828 declare double @llvm.minnum.f64(double, double) #1
2829 declare double @llvm.fma.f64(double, double, double) #1
2831 declare float @llvm.amdgcn.sin.f32(float) #1
2832 declare float @llvm.amdgcn.rcp.f32(float) #1
2833 declare float @llvm.amdgcn.rcp.legacy(float) #1
2834 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2835 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2836 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2838 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2839 attributes #1 = { nounwind readnone }
2840 attributes #2 = { nounwind "unsafe-fp-math"="true" }
2841 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }