1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
7 ; --------------------------------------------------------------------------------
9 ; --------------------------------------------------------------------------------
11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
21 %tid = call i32 @llvm.amdgcn.workitem.id.x()
22 %tid.ext = sext i32 %tid to i64
23 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
24 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
25 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
26 %a = load volatile float, float addrspace(1)* %a.gep
27 %b = load volatile float, float addrspace(1)* %b.gep
28 %add = fadd float %a, %b
29 %fneg = fsub float -0.000000e+00, %add
30 store float %fneg, float addrspace(1)* %out.gep
34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
42 %tid = call i32 @llvm.amdgcn.workitem.id.x()
43 %tid.ext = sext i32 %tid to i64
44 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
45 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
46 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
47 %a = load volatile float, float addrspace(1)* %a.gep
48 %b = load volatile float, float addrspace(1)* %b.gep
49 %add = fadd float %a, %b
50 %fneg = fsub float -0.000000e+00, %add
51 store volatile float %fneg, float addrspace(1)* %out
52 store volatile float %add, float addrspace(1)* %out
56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
70 %tid = call i32 @llvm.amdgcn.workitem.id.x()
71 %tid.ext = sext i32 %tid to i64
72 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
73 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75 %a = load volatile float, float addrspace(1)* %a.gep
76 %b = load volatile float, float addrspace(1)* %b.gep
77 %add = fadd float %a, %b
78 %fneg = fsub float -0.000000e+00, %add
79 %use1 = fmul float %add, 4.0
80 store volatile float %fneg, float addrspace(1)* %out
81 store volatile float %use1, float addrspace(1)* %out
85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
89 ; GCN-SAFE: v_sub_f32_e32
90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
96 %tid = call i32 @llvm.amdgcn.workitem.id.x()
97 %tid.ext = sext i32 %tid to i64
98 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
99 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
100 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
101 %a = load volatile float, float addrspace(1)* %a.gep
102 %b = load volatile float, float addrspace(1)* %b.gep
103 %fneg.a = fsub float -0.000000e+00, %a
104 %add = fadd float %fneg.a, %b
105 %fneg = fsub float -0.000000e+00, %add
106 store volatile float %fneg, float addrspace(1)* %out
110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120 %tid = call i32 @llvm.amdgcn.workitem.id.x()
121 %tid.ext = sext i32 %tid to i64
122 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
123 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
124 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
125 %a = load volatile float, float addrspace(1)* %a.gep
126 %b = load volatile float, float addrspace(1)* %b.gep
127 %fneg.b = fsub float -0.000000e+00, %b
128 %add = fadd float %a, %fneg.b
129 %fneg = fsub float -0.000000e+00, %add
130 store volatile float %fneg, float addrspace(1)* %out
134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
147 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
148 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
149 %a = load volatile float, float addrspace(1)* %a.gep
150 %b = load volatile float, float addrspace(1)* %b.gep
151 %fneg.a = fsub float -0.000000e+00, %a
152 %fneg.b = fsub float -0.000000e+00, %b
153 %add = fadd float %fneg.a, %fneg.b
154 %fneg = fsub float -0.000000e+00, %add
155 store volatile float %fneg, float addrspace(1)* %out
159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %tid.ext = sext i32 %tid to i64
175 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
176 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
177 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
178 %a = load volatile float, float addrspace(1)* %a.gep
179 %b = load volatile float, float addrspace(1)* %b.gep
180 %fneg.a = fsub float -0.000000e+00, %a
181 %add = fadd float %fneg.a, %b
182 %fneg = fsub float -0.000000e+00, %add
183 store volatile float %fneg, float addrspace(1)* %out
184 store volatile float %fneg.a, float addrspace(1)* %out
188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
202 %tid.ext = sext i32 %tid to i64
203 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
204 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
206 %a = load volatile float, float addrspace(1)* %a.gep
207 %b = load volatile float, float addrspace(1)* %b.gep
208 %fneg.a = fsub float -0.000000e+00, %a
209 %add = fadd float %fneg.a, %b
210 %fneg = fsub float -0.000000e+00, %add
211 %use1 = fmul float %fneg.a, %c
212 store volatile float %fneg, float addrspace(1)* %out
213 store volatile float %use1, float addrspace(1)* %out
217 ; This one asserted with -enable-no-signed-zeros-fp-math
218 ; GCN-LABEL: {{^}}fneg_fadd_0:
219 ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
220 ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
221 ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
222 ; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]],
223 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]]
225 define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
227 %tmp7 = fdiv float 1.000000e+00, %tmp6
228 %tmp8 = fmul float 0.000000e+00, %tmp7
229 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
230 %.i188 = fadd float %tmp9, 0.000000e+00
231 %tmp10 = fcmp uge float %.i188, %tmp2
232 %tmp11 = fsub float -0.000000e+00, %.i188
233 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
234 %tmp12 = fcmp ule float %.i092, 0.000000e+00
235 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
239 ; --------------------------------------------------------------------------------
241 ; --------------------------------------------------------------------------------
243 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
244 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
245 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
246 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
247 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
248 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
249 %tid = call i32 @llvm.amdgcn.workitem.id.x()
250 %tid.ext = sext i32 %tid to i64
251 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
252 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
253 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
254 %a = load volatile float, float addrspace(1)* %a.gep
255 %b = load volatile float, float addrspace(1)* %b.gep
256 %mul = fmul float %a, %b
257 %fneg = fsub float -0.000000e+00, %mul
258 store float %fneg, float addrspace(1)* %out.gep
262 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
263 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
264 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
265 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
266 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
267 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
268 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
269 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
270 %tid = call i32 @llvm.amdgcn.workitem.id.x()
271 %tid.ext = sext i32 %tid to i64
272 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
273 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
274 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
275 %a = load volatile float, float addrspace(1)* %a.gep
276 %b = load volatile float, float addrspace(1)* %b.gep
277 %mul = fmul float %a, %b
278 %fneg = fsub float -0.000000e+00, %mul
279 store volatile float %fneg, float addrspace(1)* %out
280 store volatile float %mul, float addrspace(1)* %out
284 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
285 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
286 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
287 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
288 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
290 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
291 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
292 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
293 %tid = call i32 @llvm.amdgcn.workitem.id.x()
294 %tid.ext = sext i32 %tid to i64
295 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
296 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
297 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
298 %a = load volatile float, float addrspace(1)* %a.gep
299 %b = load volatile float, float addrspace(1)* %b.gep
300 %mul = fmul float %a, %b
301 %fneg = fsub float -0.000000e+00, %mul
302 %use1 = fmul float %mul, 4.0
303 store volatile float %fneg, float addrspace(1)* %out
304 store volatile float %use1, float addrspace(1)* %out
308 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
309 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
310 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
311 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
312 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
313 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
314 %tid = call i32 @llvm.amdgcn.workitem.id.x()
315 %tid.ext = sext i32 %tid to i64
316 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
317 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
318 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
319 %a = load volatile float, float addrspace(1)* %a.gep
320 %b = load volatile float, float addrspace(1)* %b.gep
321 %fneg.a = fsub float -0.000000e+00, %a
322 %mul = fmul float %fneg.a, %b
323 %fneg = fsub float -0.000000e+00, %mul
324 store volatile float %fneg, float addrspace(1)* %out
328 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
329 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
330 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
331 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
332 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
333 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
334 %tid = call i32 @llvm.amdgcn.workitem.id.x()
335 %tid.ext = sext i32 %tid to i64
336 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
337 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
338 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
339 %a = load volatile float, float addrspace(1)* %a.gep
340 %b = load volatile float, float addrspace(1)* %b.gep
341 %fneg.b = fsub float -0.000000e+00, %b
342 %mul = fmul float %a, %fneg.b
343 %fneg = fsub float -0.000000e+00, %mul
344 store volatile float %fneg, float addrspace(1)* %out
348 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
349 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
350 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
351 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
352 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
353 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
354 %tid = call i32 @llvm.amdgcn.workitem.id.x()
355 %tid.ext = sext i32 %tid to i64
356 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
357 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
358 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
359 %a = load volatile float, float addrspace(1)* %a.gep
360 %b = load volatile float, float addrspace(1)* %b.gep
361 %fneg.a = fsub float -0.000000e+00, %a
362 %fneg.b = fsub float -0.000000e+00, %b
363 %mul = fmul float %fneg.a, %fneg.b
364 %fneg = fsub float -0.000000e+00, %mul
365 store volatile float %fneg, float addrspace(1)* %out
369 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
370 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
371 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
372 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
373 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
375 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
376 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
377 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
378 %tid = call i32 @llvm.amdgcn.workitem.id.x()
379 %tid.ext = sext i32 %tid to i64
380 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
381 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
382 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
383 %a = load volatile float, float addrspace(1)* %a.gep
384 %b = load volatile float, float addrspace(1)* %b.gep
385 %fneg.a = fsub float -0.000000e+00, %a
386 %mul = fmul float %fneg.a, %b
387 %fneg = fsub float -0.000000e+00, %mul
388 store volatile float %fneg, float addrspace(1)* %out
389 store volatile float %fneg.a, float addrspace(1)* %out
393 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
394 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
395 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
396 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
397 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
398 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
399 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
400 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
401 %tid = call i32 @llvm.amdgcn.workitem.id.x()
402 %tid.ext = sext i32 %tid to i64
403 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
404 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
405 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
406 %a = load volatile float, float addrspace(1)* %a.gep
407 %b = load volatile float, float addrspace(1)* %b.gep
408 %fneg.a = fsub float -0.000000e+00, %a
409 %mul = fmul float %fneg.a, %b
410 %fneg = fsub float -0.000000e+00, %mul
411 %use1 = fmul float %fneg.a, %c
412 store volatile float %fneg, float addrspace(1)* %out
413 store volatile float %use1, float addrspace(1)* %out
417 ; --------------------------------------------------------------------------------
419 ; --------------------------------------------------------------------------------
421 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
422 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
423 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
424 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
425 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
426 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
427 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
428 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
429 %tid = call i32 @llvm.amdgcn.workitem.id.x()
430 %tid.ext = sext i32 %tid to i64
431 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
432 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
433 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
434 %a = load volatile float, float addrspace(1)* %a.gep
435 %b = load volatile float, float addrspace(1)* %b.gep
436 %min = call float @llvm.minnum.f32(float %a, float %b)
437 %fneg = fsub float -0.000000e+00, %min
438 store float %fneg, float addrspace(1)* %out.gep
442 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
445 ; GCN: v_max_f32_e64 v0, -v0, -v1
447 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
448 %min = call float @llvm.minnum.f32(float %a, float %b)
449 %fneg = fsub float -0.000000e+00, %min
453 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
454 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
455 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
456 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
457 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
458 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
459 %tid = call i32 @llvm.amdgcn.workitem.id.x()
460 %tid.ext = sext i32 %tid to i64
461 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
462 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
463 %a = load volatile float, float addrspace(1)* %a.gep
464 %min = call float @llvm.minnum.f32(float %a, float %a)
465 %min.fneg = fsub float -0.0, %min
466 store float %min.fneg, float addrspace(1)* %out.gep
470 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
472 ; GCN: v_max_f32_e64 v0, -v0, -v0
474 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
475 %min = call float @llvm.minnum.f32(float %a, float %a)
476 %min.fneg = fsub float -0.0, %min
480 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
481 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
482 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
483 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
484 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
485 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
486 %tid = call i32 @llvm.amdgcn.workitem.id.x()
487 %tid.ext = sext i32 %tid to i64
488 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
489 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
490 %a = load volatile float, float addrspace(1)* %a.gep
491 %min = call float @llvm.minnum.f32(float 4.0, float %a)
492 %fneg = fsub float -0.000000e+00, %min
493 store float %fneg, float addrspace(1)* %out.gep
497 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
499 ; GCN: v_max_f32_e64 v0, -v0, -4.0
501 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
502 %min = call float @llvm.minnum.f32(float 4.0, float %a)
503 %fneg = fsub float -0.000000e+00, %min
507 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
508 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
509 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
510 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
511 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
512 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
513 %tid = call i32 @llvm.amdgcn.workitem.id.x()
514 %tid.ext = sext i32 %tid to i64
515 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
516 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
517 %a = load volatile float, float addrspace(1)* %a.gep
518 %min = call float @llvm.minnum.f32(float -4.0, float %a)
519 %fneg = fsub float -0.000000e+00, %min
520 store float %fneg, float addrspace(1)* %out.gep
524 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
526 ; GCN: v_max_f32_e64 v0, -v0, 4.0
528 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
529 %min = call float @llvm.minnum.f32(float -4.0, float %a)
530 %fneg = fsub float -0.000000e+00, %min
534 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
535 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
536 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
537 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
538 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
539 %tid = call i32 @llvm.amdgcn.workitem.id.x()
540 %tid.ext = sext i32 %tid to i64
541 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
542 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
543 %a = load volatile float, float addrspace(1)* %a.gep
544 %min = call float @llvm.minnum.f32(float 0.0, float %a)
545 %fneg = fsub float -0.000000e+00, %min
546 store float %fneg, float addrspace(1)* %out.gep
550 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
551 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
552 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
553 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
554 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
555 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
556 %tid = call i32 @llvm.amdgcn.workitem.id.x()
557 %tid.ext = sext i32 %tid to i64
558 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
559 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
560 %a = load volatile float, float addrspace(1)* %a.gep
561 %min = call float @llvm.minnum.f32(float -0.0, float %a)
562 %fneg = fsub float -0.000000e+00, %min
563 store float %fneg, float addrspace(1)* %out.gep
567 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
568 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
570 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
571 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
573 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
574 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
575 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
577 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
578 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
579 %tid = call i32 @llvm.amdgcn.workitem.id.x()
580 %tid.ext = sext i32 %tid to i64
581 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
582 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
583 %a = load volatile float, float addrspace(1)* %a.gep
584 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
585 %fneg = fsub float -0.000000e+00, %min
586 store float %fneg, float addrspace(1)* %out.gep
590 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
591 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
593 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
594 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
596 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
597 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
599 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
600 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
601 %tid = call i32 @llvm.amdgcn.workitem.id.x()
602 %tid.ext = sext i32 %tid to i64
603 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
604 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
605 %a = load volatile float, float addrspace(1)* %a.gep
606 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
607 %fneg = fsub float -0.000000e+00, %min
608 store float %fneg, float addrspace(1)* %out.gep
612 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
613 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
615 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
616 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
617 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
619 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
620 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
621 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
623 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
624 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
625 %tid = call i32 @llvm.amdgcn.workitem.id.x()
626 %tid.ext = sext i32 %tid to i64
627 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
628 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
629 %a = load volatile half, half addrspace(1)* %a.gep
630 %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
631 %fneg = fsub half -0.000000e+00, %min
632 store half %fneg, half addrspace(1)* %out.gep
636 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
637 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
639 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
640 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
641 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
643 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
644 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
646 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
647 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
648 %tid = call i32 @llvm.amdgcn.workitem.id.x()
649 %tid.ext = sext i32 %tid to i64
650 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
651 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
652 %a = load volatile half, half addrspace(1)* %a.gep
653 %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
654 %fneg = fsub half -0.000000e+00, %min
655 store half %fneg, half addrspace(1)* %out.gep
659 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
660 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
662 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
663 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
664 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
665 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
667 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
668 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
670 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
671 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
672 %tid = call i32 @llvm.amdgcn.workitem.id.x()
673 %tid.ext = sext i32 %tid to i64
674 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
675 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
676 %a = load volatile double, double addrspace(1)* %a.gep
677 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
678 %fneg = fsub double -0.000000e+00, %min
679 store double %fneg, double addrspace(1)* %out.gep
683 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
684 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
686 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
687 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
688 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
689 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
691 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
692 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
694 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
695 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
696 %tid = call i32 @llvm.amdgcn.workitem.id.x()
697 %tid.ext = sext i32 %tid to i64
698 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
699 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
700 %a = load volatile double, double addrspace(1)* %a.gep
701 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
702 %fneg = fsub double -0.000000e+00, %min
703 store double %fneg, double addrspace(1)* %out.gep
707 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
709 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
711 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
712 %min = call float @llvm.minnum.f32(float -0.0, float %a)
713 %fneg = fsub float -0.000000e+00, %min
717 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
718 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
719 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
720 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
721 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
722 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
723 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
724 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
725 %tid = call i32 @llvm.amdgcn.workitem.id.x()
726 %tid.ext = sext i32 %tid to i64
727 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
728 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
729 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
730 %a = load volatile float, float addrspace(1)* %a.gep
731 %b = load volatile float, float addrspace(1)* %b.gep
732 %min = call float @llvm.minnum.f32(float 0.0, float %a)
733 %fneg = fsub float -0.000000e+00, %min
734 %mul = fmul float %fneg, %b
735 store float %mul, float addrspace(1)* %out.gep
739 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
740 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
741 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
743 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
745 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
746 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
748 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
749 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
750 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
753 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
754 %tid = call i32 @llvm.amdgcn.workitem.id.x()
755 %tid.ext = sext i32 %tid to i64
756 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
757 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
758 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
759 %a = load volatile float, float addrspace(1)* %a.gep
760 %b = load volatile float, float addrspace(1)* %b.gep
761 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
762 %fneg = fsub float -0.000000e+00, %min
763 %mul = fmul float %fneg, %b
764 store float %mul, float addrspace(1)* %out.gep
768 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
771 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
772 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
774 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
775 %min = call float @llvm.minnum.f32(float 0.0, float %a)
776 %fneg = fsub float -0.000000e+00, %min
777 %mul = fmul float %fneg, %b
781 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
782 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
783 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
784 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
785 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
786 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
787 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
788 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
789 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
790 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
791 %tid = call i32 @llvm.amdgcn.workitem.id.x()
792 %tid.ext = sext i32 %tid to i64
793 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
794 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
795 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
796 %a = load volatile float, float addrspace(1)* %a.gep
797 %b = load volatile float, float addrspace(1)* %b.gep
798 %min = call float @llvm.minnum.f32(float %a, float %b)
799 %fneg = fsub float -0.000000e+00, %min
800 %use1 = fmul float %min, 4.0
801 store volatile float %fneg, float addrspace(1)* %out
802 store volatile float %use1, float addrspace(1)* %out
806 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
809 ; GCN: v_max_f32_e64 v0, -v0, -v1
810 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
812 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
813 %min = call float @llvm.minnum.f32(float %a, float %b)
814 %fneg = fsub float -0.000000e+00, %min
815 %use1 = fmul float %min, 4.0
816 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
817 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
818 ret <2 x float> %ins1
821 ; --------------------------------------------------------------------------------
823 ; --------------------------------------------------------------------------------
826 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
827 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
828 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
829 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
830 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
831 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
832 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
833 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
834 %tid = call i32 @llvm.amdgcn.workitem.id.x()
835 %tid.ext = sext i32 %tid to i64
836 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
837 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
838 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
839 %a = load volatile float, float addrspace(1)* %a.gep
840 %b = load volatile float, float addrspace(1)* %b.gep
841 %max = call float @llvm.maxnum.f32(float %a, float %b)
842 %fneg = fsub float -0.000000e+00, %max
843 store float %fneg, float addrspace(1)* %out.gep
847 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
850 ; GCN: v_min_f32_e64 v0, -v0, -v1
852 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
853 %max = call float @llvm.maxnum.f32(float %a, float %b)
854 %fneg = fsub float -0.000000e+00, %max
858 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
859 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
860 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
861 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
862 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
863 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
864 %tid = call i32 @llvm.amdgcn.workitem.id.x()
865 %tid.ext = sext i32 %tid to i64
866 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
868 %a = load volatile float, float addrspace(1)* %a.gep
869 %max = call float @llvm.maxnum.f32(float %a, float %a)
870 %max.fneg = fsub float -0.0, %max
871 store float %max.fneg, float addrspace(1)* %out.gep
875 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
877 ; GCN: v_min_f32_e64 v0, -v0, -v0
879 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
880 %max = call float @llvm.maxnum.f32(float %a, float %a)
881 %max.fneg = fsub float -0.0, %max
885 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
886 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
887 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
888 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
889 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
890 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
891 %tid = call i32 @llvm.amdgcn.workitem.id.x()
892 %tid.ext = sext i32 %tid to i64
893 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
894 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
895 %a = load volatile float, float addrspace(1)* %a.gep
896 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
897 %fneg = fsub float -0.000000e+00, %max
898 store float %fneg, float addrspace(1)* %out.gep
902 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
904 ; GCN: v_min_f32_e64 v0, -v0, -4.0
906 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
907 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
908 %fneg = fsub float -0.000000e+00, %max
912 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
913 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
914 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
915 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
916 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
917 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
918 %tid = call i32 @llvm.amdgcn.workitem.id.x()
919 %tid.ext = sext i32 %tid to i64
920 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
921 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
922 %a = load volatile float, float addrspace(1)* %a.gep
923 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
924 %fneg = fsub float -0.000000e+00, %max
925 store float %fneg, float addrspace(1)* %out.gep
929 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
931 ; GCN: v_min_f32_e64 v0, -v0, 4.0
933 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
934 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
935 %fneg = fsub float -0.000000e+00, %max
939 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
940 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
941 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
942 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
943 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
944 %tid = call i32 @llvm.amdgcn.workitem.id.x()
945 %tid.ext = sext i32 %tid to i64
946 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
947 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
948 %a = load volatile float, float addrspace(1)* %a.gep
949 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
950 %fneg = fsub float -0.000000e+00, %max
951 store float %fneg, float addrspace(1)* %out.gep
955 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
956 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
957 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
958 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
959 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
960 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
961 %tid = call i32 @llvm.amdgcn.workitem.id.x()
962 %tid.ext = sext i32 %tid to i64
963 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
964 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
965 %a = load volatile float, float addrspace(1)* %a.gep
966 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
967 %fneg = fsub float -0.000000e+00, %max
968 store float %fneg, float addrspace(1)* %out.gep
972 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
974 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
976 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
977 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
978 %fneg = fsub float -0.000000e+00, %max
982 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
983 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
984 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
985 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
986 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
987 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
988 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
989 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
990 %tid = call i32 @llvm.amdgcn.workitem.id.x()
991 %tid.ext = sext i32 %tid to i64
992 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
993 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
994 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
995 %a = load volatile float, float addrspace(1)* %a.gep
996 %b = load volatile float, float addrspace(1)* %b.gep
997 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
998 %fneg = fsub float -0.000000e+00, %max
999 %mul = fmul float %fneg, %b
1000 store float %mul, float addrspace(1)* %out.gep
1004 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1007 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
1008 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
1009 ; GCN-NEXT: ; return
1010 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
1011 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1012 %fneg = fsub float -0.000000e+00, %max
1013 %mul = fmul float %fneg, %b
1017 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1018 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1019 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1020 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
1021 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1022 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1023 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1024 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1025 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1026 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1027 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1028 %tid.ext = sext i32 %tid to i64
1029 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1030 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1031 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1032 %a = load volatile float, float addrspace(1)* %a.gep
1033 %b = load volatile float, float addrspace(1)* %b.gep
1034 %max = call float @llvm.maxnum.f32(float %a, float %b)
1035 %fneg = fsub float -0.000000e+00, %max
1036 %use1 = fmul float %max, 4.0
1037 store volatile float %fneg, float addrspace(1)* %out
1038 store volatile float %use1, float addrspace(1)* %out
1042 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1045 ; GCN: v_min_f32_e64 v0, -v0, -v1
1046 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1047 ; GCN-NEXT: ; return
1048 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1049 %max = call float @llvm.maxnum.f32(float %a, float %b)
1050 %fneg = fsub float -0.000000e+00, %max
1051 %use1 = fmul float %max, 4.0
1052 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1053 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1054 ret <2 x float> %ins1
1057 ; --------------------------------------------------------------------------------
1059 ; --------------------------------------------------------------------------------
1061 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1062 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1063 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1064 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1066 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1067 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1069 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1070 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1071 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1072 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1073 %tid.ext = sext i32 %tid to i64
1074 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1075 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1076 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1077 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1078 %a = load volatile float, float addrspace(1)* %a.gep
1079 %b = load volatile float, float addrspace(1)* %b.gep
1080 %c = load volatile float, float addrspace(1)* %c.gep
1081 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1082 %fneg = fsub float -0.000000e+00, %fma
1083 store float %fneg, float addrspace(1)* %out.gep
1087 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1088 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1089 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1090 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1091 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1092 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1093 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1094 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1095 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1096 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1097 %tid.ext = sext i32 %tid to i64
1098 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1099 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1100 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1101 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1102 %a = load volatile float, float addrspace(1)* %a.gep
1103 %b = load volatile float, float addrspace(1)* %b.gep
1104 %c = load volatile float, float addrspace(1)* %c.gep
1105 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1106 %fneg = fsub float -0.000000e+00, %fma
1107 store volatile float %fneg, float addrspace(1)* %out
1108 store volatile float %fma, float addrspace(1)* %out
1112 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1113 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1114 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1115 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1117 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1118 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1119 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1121 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1122 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1124 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1125 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1126 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1127 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1128 %tid.ext = sext i32 %tid to i64
1129 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1130 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1131 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1132 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1133 %a = load volatile float, float addrspace(1)* %a.gep
1134 %b = load volatile float, float addrspace(1)* %b.gep
1135 %c = load volatile float, float addrspace(1)* %c.gep
1136 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1137 %fneg = fsub float -0.000000e+00, %fma
1138 %use1 = fmul float %fma, 4.0
1139 store volatile float %fneg, float addrspace(1)* %out
1140 store volatile float %use1, float addrspace(1)* %out
1144 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1145 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1146 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1147 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1149 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1150 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1152 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1153 ; GCN-NSZ-NOT: [[FMA]]
1154 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1155 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1156 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1157 %tid.ext = sext i32 %tid to i64
1158 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1159 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1160 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1161 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1162 %a = load volatile float, float addrspace(1)* %a.gep
1163 %b = load volatile float, float addrspace(1)* %b.gep
1164 %c = load volatile float, float addrspace(1)* %c.gep
1165 %fneg.a = fsub float -0.000000e+00, %a
1166 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1167 %fneg = fsub float -0.000000e+00, %fma
1168 store volatile float %fneg, float addrspace(1)* %out
1172 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1173 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1174 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1175 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1177 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1178 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1180 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1181 ; GCN-NSZ-NOT: [[FMA]]
1182 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1183 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1185 %tid.ext = sext i32 %tid to i64
1186 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1187 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1188 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1189 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1190 %a = load volatile float, float addrspace(1)* %a.gep
1191 %b = load volatile float, float addrspace(1)* %b.gep
1192 %c = load volatile float, float addrspace(1)* %c.gep
1193 %fneg.b = fsub float -0.000000e+00, %b
1194 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1195 %fneg = fsub float -0.000000e+00, %fma
1196 store volatile float %fneg, float addrspace(1)* %out
1200 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1201 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1202 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1203 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1205 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1206 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1208 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1209 ; GCN-NSZ-NOT: [[FMA]]
1210 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1211 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1212 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1213 %tid.ext = sext i32 %tid to i64
1214 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1215 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1216 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1217 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1218 %a = load volatile float, float addrspace(1)* %a.gep
1219 %b = load volatile float, float addrspace(1)* %b.gep
1220 %c = load volatile float, float addrspace(1)* %c.gep
1221 %fneg.a = fsub float -0.000000e+00, %a
1222 %fneg.b = fsub float -0.000000e+00, %b
1223 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1224 %fneg = fsub float -0.000000e+00, %fma
1225 store volatile float %fneg, float addrspace(1)* %out
1229 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1230 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1231 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1232 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1234 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1235 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1237 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1238 ; GCN-NSZ-NOT: [[FMA]]
1239 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1240 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1241 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1242 %tid.ext = sext i32 %tid to i64
1243 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1244 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1245 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1246 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1247 %a = load volatile float, float addrspace(1)* %a.gep
1248 %b = load volatile float, float addrspace(1)* %b.gep
1249 %c = load volatile float, float addrspace(1)* %c.gep
1250 %fneg.a = fsub float -0.000000e+00, %a
1251 %fneg.c = fsub float -0.000000e+00, %c
1252 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1253 %fneg = fsub float -0.000000e+00, %fma
1254 store volatile float %fneg, float addrspace(1)* %out
1258 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1259 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1260 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1261 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1263 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1264 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1266 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1267 ; GCN-NSZ-NOT: [[FMA]]
1268 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1269 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1270 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1271 %tid.ext = sext i32 %tid to i64
1272 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1273 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1274 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1275 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1276 %a = load volatile float, float addrspace(1)* %a.gep
1277 %b = load volatile float, float addrspace(1)* %b.gep
1278 %c = load volatile float, float addrspace(1)* %c.gep
1279 %fneg.c = fsub float -0.000000e+00, %c
1280 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1281 %fneg = fsub float -0.000000e+00, %fma
1282 store volatile float %fneg, float addrspace(1)* %out
1286 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1287 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1288 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1289 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1291 ; GCN-SAFE: v_xor_b32
1292 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1293 ; GCN-SAFE: v_xor_b32
1295 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1296 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1298 ; GCN-NSZ-NOT: [[FMA]]
1299 ; GCN-NSZ-NOT: [[NEG_A]]
1300 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1301 ; GCN-NSZ-NOT: [[NEG_A]]
1302 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1303 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1304 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1305 %tid.ext = sext i32 %tid to i64
1306 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1307 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1308 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1309 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1310 %a = load volatile float, float addrspace(1)* %a.gep
1311 %b = load volatile float, float addrspace(1)* %b.gep
1312 %c = load volatile float, float addrspace(1)* %c.gep
1313 %fneg.a = fsub float -0.000000e+00, %a
1314 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1315 %fneg = fsub float -0.000000e+00, %fma
1316 store volatile float %fneg, float addrspace(1)* %out
1317 store volatile float %fneg.a, float addrspace(1)* %out
1321 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1322 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1323 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1324 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1326 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1327 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1328 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1330 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1331 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1332 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1333 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1334 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1335 %tid.ext = sext i32 %tid to i64
1336 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1337 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1338 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1339 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1340 %a = load volatile float, float addrspace(1)* %a.gep
1341 %b = load volatile float, float addrspace(1)* %b.gep
1342 %c = load volatile float, float addrspace(1)* %c.gep
1343 %fneg.a = fsub float -0.000000e+00, %a
1344 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1345 %fneg = fsub float -0.000000e+00, %fma
1346 %use1 = fmul float %fneg.a, %d
1347 store volatile float %fneg, float addrspace(1)* %out
1348 store volatile float %use1, float addrspace(1)* %out
1352 ; --------------------------------------------------------------------------------
1354 ; --------------------------------------------------------------------------------
1356 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1357 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1358 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1359 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1361 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1362 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1364 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1365 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1366 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1367 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1368 %tid.ext = sext i32 %tid to i64
1369 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1370 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1371 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1372 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1373 %a = load volatile float, float addrspace(1)* %a.gep
1374 %b = load volatile float, float addrspace(1)* %b.gep
1375 %c = load volatile float, float addrspace(1)* %c.gep
1376 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1377 %fneg = fsub float -0.000000e+00, %fma
1378 store float %fneg, float addrspace(1)* %out.gep
1382 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1383 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1384 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1385 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1387 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1388 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1389 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1391 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
1392 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1394 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1395 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1396 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1397 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1398 %tid.ext = sext i32 %tid to i64
1399 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1400 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1401 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1402 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1403 %a = load volatile float, float addrspace(1)* %a.gep
1404 %b = load volatile float, float addrspace(1)* %b.gep
1405 %c = load volatile float, float addrspace(1)* %c.gep
1406 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1407 %fneg = fsub float -0.000000e+00, %fma
1408 %use1 = fmul float %fma, 4.0
1409 store volatile float %fneg, float addrspace(1)* %out
1410 store volatile float %use1, float addrspace(1)* %out
1414 ; --------------------------------------------------------------------------------
1416 ; --------------------------------------------------------------------------------
1418 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1419 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1420 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1421 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1422 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1423 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1424 %tid.ext = sext i32 %tid to i64
1425 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1426 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1427 %a = load volatile float, float addrspace(1)* %a.gep
1428 %fpext = fpext float %a to double
1429 %fneg = fsub double -0.000000e+00, %fpext
1430 store double %fneg, double addrspace(1)* %out.gep
1434 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1435 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1436 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1437 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1438 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1439 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1440 %tid.ext = sext i32 %tid to i64
1441 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1442 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1443 %a = load volatile float, float addrspace(1)* %a.gep
1444 %fneg.a = fsub float -0.000000e+00, %a
1445 %fpext = fpext float %fneg.a to double
1446 %fneg = fsub double -0.000000e+00, %fpext
1447 store double %fneg, double addrspace(1)* %out.gep
1451 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1452 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1453 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1454 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1455 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1456 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1457 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1458 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1459 %tid.ext = sext i32 %tid to i64
1460 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1461 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1462 %a = load volatile float, float addrspace(1)* %a.gep
1463 %fneg.a = fsub float -0.000000e+00, %a
1464 %fpext = fpext float %fneg.a to double
1465 %fneg = fsub double -0.000000e+00, %fpext
1466 store volatile double %fneg, double addrspace(1)* %out.gep
1467 store volatile float %fneg.a, float addrspace(1)* undef
1471 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1472 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1473 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1474 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1475 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1476 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1477 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1478 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1479 %tid.ext = sext i32 %tid to i64
1480 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1481 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1482 %a = load volatile float, float addrspace(1)* %a.gep
1483 %fpext = fpext float %a to double
1484 %fneg = fsub double -0.000000e+00, %fpext
1485 store volatile double %fneg, double addrspace(1)* %out.gep
1486 store volatile double %fpext, double addrspace(1)* undef
1490 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1491 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1492 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1493 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1494 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1495 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1496 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1497 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1498 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1499 %tid.ext = sext i32 %tid to i64
1500 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1501 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1502 %a = load volatile float, float addrspace(1)* %a.gep
1503 %fpext = fpext float %a to double
1504 %fneg = fsub double -0.000000e+00, %fpext
1505 %mul = fmul double %fpext, 4.0
1506 store volatile double %fneg, double addrspace(1)* %out.gep
1507 store volatile double %mul, double addrspace(1)* %out.gep
1511 ; FIXME: Source modifiers not folded for f16->f32
1512 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1513 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1514 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1515 %tid.ext = sext i32 %tid to i64
1516 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1517 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1518 %a = load volatile half, half addrspace(1)* %a.gep
1519 %fpext = fpext half %a to float
1520 %fneg = fsub float -0.000000e+00, %fpext
1521 store volatile float %fneg, float addrspace(1)* %out.gep
1522 store volatile float %fpext, float addrspace(1)* %out.gep
1526 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1527 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1528 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1529 %tid.ext = sext i32 %tid to i64
1530 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1531 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1532 %a = load volatile half, half addrspace(1)* %a.gep
1533 %fpext = fpext half %a to float
1534 %fneg = fsub float -0.000000e+00, %fpext
1535 %mul = fmul float %fpext, 4.0
1536 store volatile float %fneg, float addrspace(1)* %out.gep
1537 store volatile float %mul, float addrspace(1)* %out.gep
1541 ; --------------------------------------------------------------------------------
1543 ; --------------------------------------------------------------------------------
1545 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1546 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1547 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1548 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1549 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1550 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1551 %tid.ext = sext i32 %tid to i64
1552 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1553 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1554 %a = load volatile double, double addrspace(1)* %a.gep
1555 %fpround = fptrunc double %a to float
1556 %fneg = fsub float -0.000000e+00, %fpround
1557 store float %fneg, float addrspace(1)* %out.gep
1561 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1562 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1563 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1564 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1565 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1566 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1567 %tid.ext = sext i32 %tid to i64
1568 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1569 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1570 %a = load volatile double, double addrspace(1)* %a.gep
1571 %fneg.a = fsub double -0.000000e+00, %a
1572 %fpround = fptrunc double %fneg.a to float
1573 %fneg = fsub float -0.000000e+00, %fpround
1574 store float %fneg, float addrspace(1)* %out.gep
1578 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1579 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1580 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1581 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1582 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1583 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1584 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1585 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1586 %tid.ext = sext i32 %tid to i64
1587 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1588 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1589 %a = load volatile double, double addrspace(1)* %a.gep
1590 %fneg.a = fsub double -0.000000e+00, %a
1591 %fpround = fptrunc double %fneg.a to float
1592 %fneg = fsub float -0.000000e+00, %fpround
1593 store volatile float %fneg, float addrspace(1)* %out.gep
1594 store volatile double %fneg.a, double addrspace(1)* undef
1598 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1599 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1600 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1601 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1603 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1604 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1605 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1606 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1607 %tid.ext = sext i32 %tid to i64
1608 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1609 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1610 %a = load volatile double, double addrspace(1)* %a.gep
1611 %fneg.a = fsub double -0.000000e+00, %a
1612 %fpround = fptrunc double %fneg.a to float
1613 %fneg = fsub float -0.000000e+00, %fpround
1614 %use1 = fmul double %fneg.a, %c
1615 store volatile float %fneg, float addrspace(1)* %out.gep
1616 store volatile double %use1, double addrspace(1)* undef
1620 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1621 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1622 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1623 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1624 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1625 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1626 %tid.ext = sext i32 %tid to i64
1627 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1628 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1629 %a = load volatile float, float addrspace(1)* %a.gep
1630 %fpround = fptrunc float %a to half
1631 %fneg = fsub half -0.000000e+00, %fpround
1632 store half %fneg, half addrspace(1)* %out.gep
1636 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1637 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1638 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1639 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1640 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1641 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1642 %tid.ext = sext i32 %tid to i64
1643 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1644 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1645 %a = load volatile float, float addrspace(1)* %a.gep
1646 %fneg.a = fsub float -0.000000e+00, %a
1647 %fpround = fptrunc float %fneg.a to half
1648 %fneg = fsub half -0.000000e+00, %fpround
1649 store half %fneg, half addrspace(1)* %out.gep
1653 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1654 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1655 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1656 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1657 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1658 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1659 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1660 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1661 %tid.ext = sext i32 %tid to i64
1662 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1663 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1664 %a = load volatile double, double addrspace(1)* %a.gep
1665 %fpround = fptrunc double %a to float
1666 %fneg = fsub float -0.000000e+00, %fpround
1667 store volatile float %fneg, float addrspace(1)* %out.gep
1668 store volatile float %fpround, float addrspace(1)* %out.gep
1672 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1673 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1674 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1675 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1676 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1677 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1678 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1679 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1680 %tid.ext = sext i32 %tid to i64
1681 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1682 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1683 %a = load volatile float, float addrspace(1)* %a.gep
1684 %fneg.a = fsub float -0.000000e+00, %a
1685 %fpround = fptrunc float %fneg.a to half
1686 %fneg = fsub half -0.000000e+00, %fpround
1687 store volatile half %fneg, half addrspace(1)* %out.gep
1688 store volatile float %fneg.a, float addrspace(1)* undef
1692 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1693 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1694 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1695 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1696 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1697 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1698 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1699 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1700 %tid.ext = sext i32 %tid to i64
1701 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1702 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1703 %a = load volatile float, float addrspace(1)* %a.gep
1704 %fneg.a = fsub float -0.000000e+00, %a
1705 %fpround = fptrunc float %fneg.a to half
1706 %fneg = fsub half -0.000000e+00, %fpround
1707 %use1 = fmul float %fneg.a, %c
1708 store volatile half %fneg, half addrspace(1)* %out.gep
1709 store volatile float %use1, float addrspace(1)* undef
1713 ; --------------------------------------------------------------------------------
1715 ; --------------------------------------------------------------------------------
1717 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1718 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1719 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1720 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1721 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1722 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1723 %tid.ext = sext i32 %tid to i64
1724 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1725 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1726 %a = load volatile float, float addrspace(1)* %a.gep
1727 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1728 %fneg = fsub float -0.000000e+00, %rcp
1729 store float %fneg, float addrspace(1)* %out.gep
1733 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1734 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1735 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1736 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1737 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1738 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1739 %tid.ext = sext i32 %tid to i64
1740 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1741 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1742 %a = load volatile float, float addrspace(1)* %a.gep
1743 %fneg.a = fsub float -0.000000e+00, %a
1744 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1745 %fneg = fsub float -0.000000e+00, %rcp
1746 store float %fneg, float addrspace(1)* %out.gep
1750 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1751 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1752 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1753 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1754 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1755 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1756 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1757 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1758 %tid.ext = sext i32 %tid to i64
1759 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1760 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1761 %a = load volatile float, float addrspace(1)* %a.gep
1762 %fneg.a = fsub float -0.000000e+00, %a
1763 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1764 %fneg = fsub float -0.000000e+00, %rcp
1765 store volatile float %fneg, float addrspace(1)* %out.gep
1766 store volatile float %fneg.a, float addrspace(1)* undef
1770 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1771 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1772 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1773 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1774 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1775 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1776 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1777 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1778 %tid.ext = sext i32 %tid to i64
1779 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1780 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1781 %a = load volatile float, float addrspace(1)* %a.gep
1782 %fneg.a = fsub float -0.000000e+00, %a
1783 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1784 %fneg = fsub float -0.000000e+00, %rcp
1785 %use1 = fmul float %fneg.a, %c
1786 store volatile float %fneg, float addrspace(1)* %out.gep
1787 store volatile float %use1, float addrspace(1)* undef
1791 ; --------------------------------------------------------------------------------
1793 ; --------------------------------------------------------------------------------
1795 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1796 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1797 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1798 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1799 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1800 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1801 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1802 %tid.ext = sext i32 %tid to i64
1803 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1804 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1805 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1806 %a = load volatile float, float addrspace(1)* %a.gep
1807 %b = load volatile float, float addrspace(1)* %b.gep
1808 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1809 %fneg = fsub float -0.000000e+00, %mul
1810 store float %fneg, float addrspace(1)* %out.gep
1814 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1815 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1816 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1817 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1818 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1819 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1820 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1821 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1822 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1823 %tid.ext = sext i32 %tid to i64
1824 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1825 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1826 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1827 %a = load volatile float, float addrspace(1)* %a.gep
1828 %b = load volatile float, float addrspace(1)* %b.gep
1829 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1830 %fneg = fsub float -0.000000e+00, %mul
1831 store volatile float %fneg, float addrspace(1)* %out
1832 store volatile float %mul, float addrspace(1)* %out
1836 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1837 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1838 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1839 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1840 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1841 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1842 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1843 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1844 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1845 %tid.ext = sext i32 %tid to i64
1846 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1847 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1848 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1849 %a = load volatile float, float addrspace(1)* %a.gep
1850 %b = load volatile float, float addrspace(1)* %b.gep
1851 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1852 %fneg = fsub float -0.000000e+00, %mul
1853 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1854 store volatile float %fneg, float addrspace(1)* %out
1855 store volatile float %use1, float addrspace(1)* %out
1859 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1860 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1861 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1862 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1863 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1864 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1865 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1866 %tid.ext = sext i32 %tid to i64
1867 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1868 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1869 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1870 %a = load volatile float, float addrspace(1)* %a.gep
1871 %b = load volatile float, float addrspace(1)* %b.gep
1872 %fneg.a = fsub float -0.000000e+00, %a
1873 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1874 %fneg = fsub float -0.000000e+00, %mul
1875 store volatile float %fneg, float addrspace(1)* %out
1879 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1880 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1881 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1882 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1883 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1884 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1885 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1886 %tid.ext = sext i32 %tid to i64
1887 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1888 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1889 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1890 %a = load volatile float, float addrspace(1)* %a.gep
1891 %b = load volatile float, float addrspace(1)* %b.gep
1892 %fneg.b = fsub float -0.000000e+00, %b
1893 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1894 %fneg = fsub float -0.000000e+00, %mul
1895 store volatile float %fneg, float addrspace(1)* %out
1899 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1900 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1901 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1902 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1903 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1904 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1905 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1906 %tid.ext = sext i32 %tid to i64
1907 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1908 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1909 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1910 %a = load volatile float, float addrspace(1)* %a.gep
1911 %b = load volatile float, float addrspace(1)* %b.gep
1912 %fneg.a = fsub float -0.000000e+00, %a
1913 %fneg.b = fsub float -0.000000e+00, %b
1914 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1915 %fneg = fsub float -0.000000e+00, %mul
1916 store volatile float %fneg, float addrspace(1)* %out
1920 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1921 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1922 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1923 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1924 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1925 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1926 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1927 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1928 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1929 %tid.ext = sext i32 %tid to i64
1930 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1931 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1932 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1933 %a = load volatile float, float addrspace(1)* %a.gep
1934 %b = load volatile float, float addrspace(1)* %b.gep
1935 %fneg.a = fsub float -0.000000e+00, %a
1936 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1937 %fneg = fsub float -0.000000e+00, %mul
1938 store volatile float %fneg, float addrspace(1)* %out
1939 store volatile float %fneg.a, float addrspace(1)* %out
1943 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1944 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1945 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1946 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1947 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1948 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1949 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1950 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1951 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1952 %tid.ext = sext i32 %tid to i64
1953 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1954 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1955 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1956 %a = load volatile float, float addrspace(1)* %a.gep
1957 %b = load volatile float, float addrspace(1)* %b.gep
1958 %fneg.a = fsub float -0.000000e+00, %a
1959 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1960 %fneg = fsub float -0.000000e+00, %mul
1961 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1962 store volatile float %fneg, float addrspace(1)* %out
1963 store volatile float %use1, float addrspace(1)* %out
1967 ; --------------------------------------------------------------------------------
1969 ; --------------------------------------------------------------------------------
1971 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1972 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1973 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1974 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1975 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1976 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1977 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1978 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1979 %tid.ext = sext i32 %tid to i64
1980 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1981 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1982 %a = load volatile float, float addrspace(1)* %a.gep
1983 %sin = call float @llvm.sin.f32(float %a)
1984 %fneg = fsub float -0.000000e+00, %sin
1985 store float %fneg, float addrspace(1)* %out.gep
1989 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1990 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1991 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1992 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1993 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1994 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1995 %tid.ext = sext i32 %tid to i64
1996 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1997 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1998 %a = load volatile float, float addrspace(1)* %a.gep
1999 %sin = call float @llvm.amdgcn.sin.f32(float %a)
2000 %fneg = fsub float -0.0, %sin
2001 store float %fneg, float addrspace(1)* %out.gep
2005 ; --------------------------------------------------------------------------------
2007 ; --------------------------------------------------------------------------------
2009 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
2010 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2011 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2012 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2013 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2014 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2015 %tid.ext = sext i32 %tid to i64
2016 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2017 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2018 %a = load volatile float, float addrspace(1)* %a.gep
2019 %trunc = call float @llvm.trunc.f32(float %a)
2020 %fneg = fsub float -0.0, %trunc
2021 store float %fneg, float addrspace(1)* %out.gep
2025 ; --------------------------------------------------------------------------------
2027 ; --------------------------------------------------------------------------------
2029 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2030 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2031 ; GCN: v_trunc_f32_e32
2032 ; GCN: v_sub_f32_e32
2033 ; GCN: v_cndmask_b32
2035 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2036 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2038 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2039 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2040 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2041 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2042 %tid.ext = sext i32 %tid to i64
2043 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2044 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2045 %a = load volatile float, float addrspace(1)* %a.gep
2046 %round = call float @llvm.round.f32(float %a)
2047 %fneg = fsub float -0.0, %round
2048 store float %fneg, float addrspace(1)* %out.gep
2052 ; --------------------------------------------------------------------------------
2054 ; --------------------------------------------------------------------------------
2056 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2057 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2058 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2059 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2060 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2061 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2062 %tid.ext = sext i32 %tid to i64
2063 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2064 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2065 %a = load volatile float, float addrspace(1)* %a.gep
2066 %rint = call float @llvm.rint.f32(float %a)
2067 %fneg = fsub float -0.0, %rint
2068 store float %fneg, float addrspace(1)* %out.gep
2072 ; --------------------------------------------------------------------------------
2074 ; --------------------------------------------------------------------------------
2076 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2077 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2078 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2079 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2080 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2081 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2082 %tid.ext = sext i32 %tid to i64
2083 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2084 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2085 %a = load volatile float, float addrspace(1)* %a.gep
2086 %nearbyint = call float @llvm.nearbyint.f32(float %a)
2087 %fneg = fsub float -0.0, %nearbyint
2088 store float %fneg, float addrspace(1)* %out.gep
2092 ; --------------------------------------------------------------------------------
2093 ; fcanonicalize tests
2094 ; --------------------------------------------------------------------------------
2096 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2097 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2098 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2099 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2100 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2101 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2102 %tid.ext = sext i32 %tid to i64
2103 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2104 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2105 %a = load volatile float, float addrspace(1)* %a.gep
2106 %trunc = call float @llvm.canonicalize.f32(float %a)
2107 %fneg = fsub float -0.0, %trunc
2108 store float %fneg, float addrspace(1)* %out.gep
2112 ; --------------------------------------------------------------------------------
2114 ; --------------------------------------------------------------------------------
2116 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2117 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2118 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2119 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2120 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2121 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2122 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2123 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2124 %tid.ext = sext i32 %tid to i64
2125 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2126 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2127 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2128 %a = load volatile float, float addrspace(1)* %a.gep
2129 %b = load volatile float, float addrspace(1)* %b.gep
2130 %mul = fmul float %a, %b
2131 %fneg = fsub float -0.0, %mul
2132 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2133 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2134 store volatile float %intrp0, float addrspace(1)* %out.gep
2135 store volatile float %intrp1, float addrspace(1)* %out.gep
2139 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2140 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2141 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2142 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2143 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2144 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2145 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2146 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2147 %tid.ext = sext i32 %tid to i64
2148 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2149 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2150 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2151 %a = load volatile float, float addrspace(1)* %a.gep
2152 %b = load volatile float, float addrspace(1)* %b.gep
2153 %mul = fmul float %a, %b
2154 %fneg = fsub float -0.0, %mul
2155 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2156 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2157 store volatile float %intrp0, float addrspace(1)* %out.gep
2158 store volatile float %intrp1, float addrspace(1)* %out.gep
2162 ; --------------------------------------------------------------------------------
2164 ; --------------------------------------------------------------------------------
2166 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2167 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2168 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2169 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2170 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2171 ; GCN: s_cbranch_scc0
2173 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2176 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2177 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2178 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2180 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2181 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2182 %tid.ext = sext i32 %tid to i64
2183 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2184 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2185 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2186 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2187 %a = load volatile float, float addrspace(1)* %a.gep
2188 %b = load volatile float, float addrspace(1)* %b.gep
2189 %c = load volatile float, float addrspace(1)* %c.gep
2190 %mul = fmul float %a, %b
2191 %fneg = fsub float -0.0, %mul
2192 %cmp0 = icmp eq i32 %d, 0
2193 br i1 %cmp0, label %if, label %endif
2196 %mul1 = fmul float %fneg, %c
2197 store volatile float %mul1, float addrspace(1)* %out.gep
2201 store volatile float %mul, float addrspace(1)* %out.gep
2205 ; --------------------------------------------------------------------------------
2207 ; --------------------------------------------------------------------------------
2209 ; Can't fold into use, so should fold into source
2210 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2211 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2212 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2213 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2214 ; GCN: ; use [[MUL]]
2215 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2216 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2217 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2218 %tid.ext = sext i32 %tid to i64
2219 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2220 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2221 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2222 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2223 %a = load volatile float, float addrspace(1)* %a.gep
2224 %b = load volatile float, float addrspace(1)* %b.gep
2225 %c = load volatile float, float addrspace(1)* %c.gep
2226 %mul = fmul float %a, %b
2227 %fneg = fsub float -0.0, %mul
2228 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2229 store volatile float %fneg, float addrspace(1)* %out.gep
2233 ; --------------------------------------------------------------------------------
2235 ; --------------------------------------------------------------------------------
2237 ; Can't fold into use, so should fold into source
2238 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2239 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2240 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2241 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2242 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2243 ; GCN: ; use [[NEG]]
2244 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2245 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2246 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2247 %tid.ext = sext i32 %tid to i64
2248 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2249 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2250 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2251 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2252 %a = load volatile float, float addrspace(1)* %a.gep
2253 %b = load volatile float, float addrspace(1)* %b.gep
2254 %c = load volatile float, float addrspace(1)* %c.gep
2255 %mul = fmul float %a, %b
2256 %fneg = fsub float -0.0, %mul
2257 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2258 store volatile float %mul, float addrspace(1)* %out.gep
2262 ; --------------------------------------------------------------------------------
2263 ; code size regression tests
2264 ; --------------------------------------------------------------------------------
2266 ; There are multiple users of the fneg that must use a VOP3
2267 ; instruction, so there is no penalty
2268 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2269 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2270 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2271 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2273 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2274 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2276 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2277 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2278 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2279 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2280 %tid.ext = sext i32 %tid to i64
2281 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2282 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2283 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2284 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2285 %a = load volatile float, float addrspace(1)* %a.gep
2286 %b = load volatile float, float addrspace(1)* %b.gep
2287 %c = load volatile float, float addrspace(1)* %c.gep
2289 %fneg.a = fsub float -0.0, %a
2290 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2291 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2293 store volatile float %fma0, float addrspace(1)* %out
2294 store volatile float %fma1, float addrspace(1)* %out
2298 ; There are multiple users, but both require using a larger encoding
2301 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2302 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2303 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2304 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2306 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2307 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2308 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2309 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2310 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2311 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2312 %tid.ext = sext i32 %tid to i64
2313 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2314 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2315 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2316 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2317 %a = load volatile float, float addrspace(1)* %a.gep
2318 %b = load volatile float, float addrspace(1)* %b.gep
2319 %c = load volatile float, float addrspace(1)* %c.gep
2321 %fneg.a = fsub float -0.0, %a
2322 %mul0 = fmul float %fneg.a, %b
2323 %mul1 = fmul float %fneg.a, %c
2325 store volatile float %mul0, float addrspace(1)* %out
2326 store volatile float %mul1, float addrspace(1)* %out
2330 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2331 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2332 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2333 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2334 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2336 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2337 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2339 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2340 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2341 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2342 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2343 %tid.ext = sext i32 %tid to i64
2344 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2345 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2346 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2347 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2348 %a = load volatile float, float addrspace(1)* %a.gep
2349 %b = load volatile float, float addrspace(1)* %b.gep
2350 %c = load volatile float, float addrspace(1)* %c.gep
2352 %fneg.a = fsub float -0.0, %a
2353 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2354 %mul1 = fmul float %fneg.a, %c
2356 store volatile float %fma0, float addrspace(1)* %out
2357 store volatile float %mul1, float addrspace(1)* %out
2361 ; The use of the fneg requires a code size increase, but folding into
2362 ; the source does not
2364 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2365 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2366 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2367 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2368 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2370 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2371 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2372 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2374 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2375 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2376 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2378 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2379 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2380 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2381 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2382 %tid.ext = sext i32 %tid to i64
2383 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2384 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2385 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2386 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2387 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2388 %a = load volatile float, float addrspace(1)* %a.gep
2389 %b = load volatile float, float addrspace(1)* %b.gep
2390 %c = load volatile float, float addrspace(1)* %c.gep
2391 %d = load volatile float, float addrspace(1)* %d.gep
2393 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2394 %fneg.fma0 = fsub float -0.0, %fma0
2395 %mul1 = fmul float %fneg.fma0, %c
2396 %mul2 = fmul float %fneg.fma0, %d
2398 store volatile float %mul1, float addrspace(1)* %out
2399 store volatile float %mul2, float addrspace(1)* %out
2403 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2404 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2405 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2406 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2407 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2409 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2410 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2411 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2413 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2414 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2415 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2416 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2417 %tid.ext = sext i32 %tid to i64
2418 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2419 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2420 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2421 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2422 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2423 %a = load volatile double, double addrspace(1)* %a.gep
2424 %b = load volatile double, double addrspace(1)* %b.gep
2425 %c = load volatile double, double addrspace(1)* %c.gep
2426 %d = load volatile double, double addrspace(1)* %d.gep
2428 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2429 %fneg.fma0 = fsub double -0.0, %fma0
2430 %mul1 = fmul double %fneg.fma0, %c
2431 %mul2 = fmul double %fneg.fma0, %d
2433 store volatile double %mul1, double addrspace(1)* %out
2434 store volatile double %mul2, double addrspace(1)* %out
2438 ; %trunc.a has one fneg use, but it requires a code size increase and
2439 ; %the fneg can instead be folded for free into the fma.
2441 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2442 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2443 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2444 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2445 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2446 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2447 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2448 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2449 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2450 %tid.ext = sext i32 %tid to i64
2451 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2452 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2453 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2454 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2455 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2456 %a = load volatile float, float addrspace(1)* %a.gep
2457 %b = load volatile float, float addrspace(1)* %b.gep
2458 %c = load volatile float, float addrspace(1)* %c.gep
2459 %d = load volatile float, float addrspace(1)* %d.gep
2461 %trunc.a = call float @llvm.trunc.f32(float %a)
2462 %trunc.fneg.a = fsub float -0.0, %trunc.a
2463 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2464 store volatile float %fma0, float addrspace(1)* %out
2468 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2469 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2470 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2471 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2472 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2473 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2474 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2475 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2476 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2477 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2478 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2479 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2480 %tid.ext = sext i32 %tid to i64
2481 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2482 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2483 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2484 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2485 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2486 %a = load volatile float, float addrspace(1)* %a.gep
2487 %b = load volatile float, float addrspace(1)* %b.gep
2488 %c = load volatile float, float addrspace(1)* %c.gep
2489 %d = load volatile float, float addrspace(1)* %d.gep
2491 %trunc.a = call float @llvm.trunc.f32(float %a)
2492 %trunc.fneg.a = fsub float -0.0, %trunc.a
2493 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2494 %mul1 = fmul float %trunc.a, %d
2495 store volatile float %fma0, float addrspace(1)* %out
2496 store volatile float %mul1, float addrspace(1)* %out
2500 declare i32 @llvm.amdgcn.workitem.id.x() #1
2501 declare float @llvm.fma.f32(float, float, float) #1
2502 declare float @llvm.fmuladd.f32(float, float, float) #1
2503 declare float @llvm.sin.f32(float) #1
2504 declare float @llvm.trunc.f32(float) #1
2505 declare float @llvm.round.f32(float) #1
2506 declare float @llvm.rint.f32(float) #1
2507 declare float @llvm.nearbyint.f32(float) #1
2508 declare float @llvm.canonicalize.f32(float) #1
2509 declare float @llvm.minnum.f32(float, float) #1
2510 declare float @llvm.maxnum.f32(float, float) #1
2511 declare half @llvm.minnum.f16(half, half) #1
2512 declare double @llvm.minnum.f64(double, double) #1
2513 declare double @llvm.fma.f64(double, double, double) #1
2515 declare float @llvm.amdgcn.sin.f32(float) #1
2516 declare float @llvm.amdgcn.rcp.f32(float) #1
2517 declare float @llvm.amdgcn.rcp.legacy(float) #1
2518 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2519 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2520 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2522 attributes #0 = { nounwind }
2523 attributes #1 = { nounwind readnone }