1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
7 ; --------------------------------------------------------------------------------
9 ; --------------------------------------------------------------------------------
11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
21 %tid = call i32 @llvm.amdgcn.workitem.id.x()
22 %tid.ext = sext i32 %tid to i64
23 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
24 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
25 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
26 %a = load volatile float, float addrspace(1)* %a.gep
27 %b = load volatile float, float addrspace(1)* %b.gep
28 %add = fadd float %a, %b
29 %fneg = fsub float -0.000000e+00, %add
30 store float %fneg, float addrspace(1)* %out.gep
34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
42 %tid = call i32 @llvm.amdgcn.workitem.id.x()
43 %tid.ext = sext i32 %tid to i64
44 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
45 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
46 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
47 %a = load volatile float, float addrspace(1)* %a.gep
48 %b = load volatile float, float addrspace(1)* %b.gep
49 %add = fadd float %a, %b
50 %fneg = fsub float -0.000000e+00, %add
51 store volatile float %fneg, float addrspace(1)* %out
52 store volatile float %add, float addrspace(1)* %out
56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
70 %tid = call i32 @llvm.amdgcn.workitem.id.x()
71 %tid.ext = sext i32 %tid to i64
72 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
73 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75 %a = load volatile float, float addrspace(1)* %a.gep
76 %b = load volatile float, float addrspace(1)* %b.gep
77 %add = fadd float %a, %b
78 %fneg = fsub float -0.000000e+00, %add
79 %use1 = fmul float %add, 4.0
80 store volatile float %fneg, float addrspace(1)* %out
81 store volatile float %use1, float addrspace(1)* %out
85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
89 ; GCN-SAFE: v_sub_f32_e32
90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
96 %tid = call i32 @llvm.amdgcn.workitem.id.x()
97 %tid.ext = sext i32 %tid to i64
98 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
99 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
100 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
101 %a = load volatile float, float addrspace(1)* %a.gep
102 %b = load volatile float, float addrspace(1)* %b.gep
103 %fneg.a = fsub float -0.000000e+00, %a
104 %add = fadd float %fneg.a, %b
105 %fneg = fsub float -0.000000e+00, %add
106 store volatile float %fneg, float addrspace(1)* %out
110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120 %tid = call i32 @llvm.amdgcn.workitem.id.x()
121 %tid.ext = sext i32 %tid to i64
122 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
123 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
124 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
125 %a = load volatile float, float addrspace(1)* %a.gep
126 %b = load volatile float, float addrspace(1)* %b.gep
127 %fneg.b = fsub float -0.000000e+00, %b
128 %add = fadd float %a, %fneg.b
129 %fneg = fsub float -0.000000e+00, %add
130 store volatile float %fneg, float addrspace(1)* %out
134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
147 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
148 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
149 %a = load volatile float, float addrspace(1)* %a.gep
150 %b = load volatile float, float addrspace(1)* %b.gep
151 %fneg.a = fsub float -0.000000e+00, %a
152 %fneg.b = fsub float -0.000000e+00, %b
153 %add = fadd float %fneg.a, %fneg.b
154 %fneg = fsub float -0.000000e+00, %add
155 store volatile float %fneg, float addrspace(1)* %out
159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %tid.ext = sext i32 %tid to i64
175 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
176 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
177 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
178 %a = load volatile float, float addrspace(1)* %a.gep
179 %b = load volatile float, float addrspace(1)* %b.gep
180 %fneg.a = fsub float -0.000000e+00, %a
181 %add = fadd float %fneg.a, %b
182 %fneg = fsub float -0.000000e+00, %add
183 store volatile float %fneg, float addrspace(1)* %out
184 store volatile float %fneg.a, float addrspace(1)* %out
188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
202 %tid.ext = sext i32 %tid to i64
203 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
204 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
206 %a = load volatile float, float addrspace(1)* %a.gep
207 %b = load volatile float, float addrspace(1)* %b.gep
208 %fneg.a = fsub float -0.000000e+00, %a
209 %add = fadd float %fneg.a, %b
210 %fneg = fsub float -0.000000e+00, %add
211 %use1 = fmul float %fneg.a, %c
212 store volatile float %fneg, float addrspace(1)* %out
213 store volatile float %use1, float addrspace(1)* %out
217 ; --------------------------------------------------------------------------------
219 ; --------------------------------------------------------------------------------
221 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
222 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
223 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
224 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
225 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
226 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
227 %tid = call i32 @llvm.amdgcn.workitem.id.x()
228 %tid.ext = sext i32 %tid to i64
229 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
230 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
231 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
232 %a = load volatile float, float addrspace(1)* %a.gep
233 %b = load volatile float, float addrspace(1)* %b.gep
234 %mul = fmul float %a, %b
235 %fneg = fsub float -0.000000e+00, %mul
236 store float %fneg, float addrspace(1)* %out.gep
240 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
241 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
242 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
243 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
244 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
245 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
246 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
247 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
248 %tid = call i32 @llvm.amdgcn.workitem.id.x()
249 %tid.ext = sext i32 %tid to i64
250 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
251 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
252 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
253 %a = load volatile float, float addrspace(1)* %a.gep
254 %b = load volatile float, float addrspace(1)* %b.gep
255 %mul = fmul float %a, %b
256 %fneg = fsub float -0.000000e+00, %mul
257 store volatile float %fneg, float addrspace(1)* %out
258 store volatile float %mul, float addrspace(1)* %out
262 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
263 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
264 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
265 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
266 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
268 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
269 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
270 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
271 %tid = call i32 @llvm.amdgcn.workitem.id.x()
272 %tid.ext = sext i32 %tid to i64
273 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
274 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
275 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
276 %a = load volatile float, float addrspace(1)* %a.gep
277 %b = load volatile float, float addrspace(1)* %b.gep
278 %mul = fmul float %a, %b
279 %fneg = fsub float -0.000000e+00, %mul
280 %use1 = fmul float %mul, 4.0
281 store volatile float %fneg, float addrspace(1)* %out
282 store volatile float %use1, float addrspace(1)* %out
286 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
287 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
288 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
289 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
290 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
291 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
292 %tid = call i32 @llvm.amdgcn.workitem.id.x()
293 %tid.ext = sext i32 %tid to i64
294 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
295 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
296 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
297 %a = load volatile float, float addrspace(1)* %a.gep
298 %b = load volatile float, float addrspace(1)* %b.gep
299 %fneg.a = fsub float -0.000000e+00, %a
300 %mul = fmul float %fneg.a, %b
301 %fneg = fsub float -0.000000e+00, %mul
302 store volatile float %fneg, float addrspace(1)* %out
306 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
307 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
308 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
309 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
310 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
311 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
312 %tid = call i32 @llvm.amdgcn.workitem.id.x()
313 %tid.ext = sext i32 %tid to i64
314 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
315 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
316 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
317 %a = load volatile float, float addrspace(1)* %a.gep
318 %b = load volatile float, float addrspace(1)* %b.gep
319 %fneg.b = fsub float -0.000000e+00, %b
320 %mul = fmul float %a, %fneg.b
321 %fneg = fsub float -0.000000e+00, %mul
322 store volatile float %fneg, float addrspace(1)* %out
326 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
327 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
328 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
329 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
330 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
331 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
332 %tid = call i32 @llvm.amdgcn.workitem.id.x()
333 %tid.ext = sext i32 %tid to i64
334 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
335 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
336 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
337 %a = load volatile float, float addrspace(1)* %a.gep
338 %b = load volatile float, float addrspace(1)* %b.gep
339 %fneg.a = fsub float -0.000000e+00, %a
340 %fneg.b = fsub float -0.000000e+00, %b
341 %mul = fmul float %fneg.a, %fneg.b
342 %fneg = fsub float -0.000000e+00, %mul
343 store volatile float %fneg, float addrspace(1)* %out
347 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
348 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
349 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
350 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
351 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
353 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
354 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
355 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
356 %tid = call i32 @llvm.amdgcn.workitem.id.x()
357 %tid.ext = sext i32 %tid to i64
358 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
359 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
360 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
361 %a = load volatile float, float addrspace(1)* %a.gep
362 %b = load volatile float, float addrspace(1)* %b.gep
363 %fneg.a = fsub float -0.000000e+00, %a
364 %mul = fmul float %fneg.a, %b
365 %fneg = fsub float -0.000000e+00, %mul
366 store volatile float %fneg, float addrspace(1)* %out
367 store volatile float %fneg.a, float addrspace(1)* %out
371 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
372 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
373 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
374 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
375 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
376 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
377 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
378 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
379 %tid = call i32 @llvm.amdgcn.workitem.id.x()
380 %tid.ext = sext i32 %tid to i64
381 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
382 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
383 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
384 %a = load volatile float, float addrspace(1)* %a.gep
385 %b = load volatile float, float addrspace(1)* %b.gep
386 %fneg.a = fsub float -0.000000e+00, %a
387 %mul = fmul float %fneg.a, %b
388 %fneg = fsub float -0.000000e+00, %mul
389 %use1 = fmul float %fneg.a, %c
390 store volatile float %fneg, float addrspace(1)* %out
391 store volatile float %use1, float addrspace(1)* %out
395 ; --------------------------------------------------------------------------------
397 ; --------------------------------------------------------------------------------
399 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
400 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
401 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
402 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
403 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
404 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
405 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
406 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
407 %tid = call i32 @llvm.amdgcn.workitem.id.x()
408 %tid.ext = sext i32 %tid to i64
409 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
410 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
411 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
412 %a = load volatile float, float addrspace(1)* %a.gep
413 %b = load volatile float, float addrspace(1)* %b.gep
414 %min = call float @llvm.minnum.f32(float %a, float %b)
415 %fneg = fsub float -0.000000e+00, %min
416 store float %fneg, float addrspace(1)* %out.gep
420 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
423 ; GCN: v_max_f32_e64 v0, -v0, -v1
425 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
426 %min = call float @llvm.minnum.f32(float %a, float %b)
427 %fneg = fsub float -0.000000e+00, %min
431 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
432 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
433 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
434 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
436 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
437 %tid = call i32 @llvm.amdgcn.workitem.id.x()
438 %tid.ext = sext i32 %tid to i64
439 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
440 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
441 %a = load volatile float, float addrspace(1)* %a.gep
442 %min = call float @llvm.minnum.f32(float %a, float %a)
443 %min.fneg = fsub float -0.0, %min
444 store float %min.fneg, float addrspace(1)* %out.gep
448 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
450 ; GCN: v_max_f32_e64 v0, -v0, -v0
452 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
453 %min = call float @llvm.minnum.f32(float %a, float %a)
454 %min.fneg = fsub float -0.0, %min
458 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
459 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
460 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
461 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
462 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
463 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
464 %tid = call i32 @llvm.amdgcn.workitem.id.x()
465 %tid.ext = sext i32 %tid to i64
466 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
467 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
468 %a = load volatile float, float addrspace(1)* %a.gep
469 %min = call float @llvm.minnum.f32(float 4.0, float %a)
470 %fneg = fsub float -0.000000e+00, %min
471 store float %fneg, float addrspace(1)* %out.gep
475 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
477 ; GCN: v_max_f32_e64 v0, -v0, -4.0
479 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
480 %min = call float @llvm.minnum.f32(float 4.0, float %a)
481 %fneg = fsub float -0.000000e+00, %min
485 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
486 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
487 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
488 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
489 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
490 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
491 %tid = call i32 @llvm.amdgcn.workitem.id.x()
492 %tid.ext = sext i32 %tid to i64
493 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
494 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
495 %a = load volatile float, float addrspace(1)* %a.gep
496 %min = call float @llvm.minnum.f32(float -4.0, float %a)
497 %fneg = fsub float -0.000000e+00, %min
498 store float %fneg, float addrspace(1)* %out.gep
502 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
504 ; GCN: v_max_f32_e64 v0, -v0, 4.0
506 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
507 %min = call float @llvm.minnum.f32(float -4.0, float %a)
508 %fneg = fsub float -0.000000e+00, %min
512 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
513 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
514 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
515 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
516 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
517 %tid = call i32 @llvm.amdgcn.workitem.id.x()
518 %tid.ext = sext i32 %tid to i64
519 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
520 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
521 %a = load volatile float, float addrspace(1)* %a.gep
522 %min = call float @llvm.minnum.f32(float 0.0, float %a)
523 %fneg = fsub float -0.000000e+00, %min
524 store float %fneg, float addrspace(1)* %out.gep
528 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
529 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
530 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
531 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
532 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
533 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
534 %tid = call i32 @llvm.amdgcn.workitem.id.x()
535 %tid.ext = sext i32 %tid to i64
536 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
537 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
538 %a = load volatile float, float addrspace(1)* %a.gep
539 %min = call float @llvm.minnum.f32(float -0.0, float %a)
540 %fneg = fsub float -0.000000e+00, %min
541 store float %fneg, float addrspace(1)* %out.gep
545 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
546 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
548 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
549 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
551 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
552 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
553 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
555 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
556 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
557 %tid = call i32 @llvm.amdgcn.workitem.id.x()
558 %tid.ext = sext i32 %tid to i64
559 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
560 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
561 %a = load volatile float, float addrspace(1)* %a.gep
562 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
563 %fneg = fsub float -0.000000e+00, %min
564 store float %fneg, float addrspace(1)* %out.gep
568 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
569 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
571 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
572 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
574 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
575 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
577 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
578 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
579 %tid = call i32 @llvm.amdgcn.workitem.id.x()
580 %tid.ext = sext i32 %tid to i64
581 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
582 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
583 %a = load volatile float, float addrspace(1)* %a.gep
584 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
585 %fneg = fsub float -0.000000e+00, %min
586 store float %fneg, float addrspace(1)* %out.gep
590 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
591 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
593 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
594 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
595 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
597 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
598 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
599 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
601 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
602 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
603 %tid = call i32 @llvm.amdgcn.workitem.id.x()
604 %tid.ext = sext i32 %tid to i64
605 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
606 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
607 %a = load volatile half, half addrspace(1)* %a.gep
608 %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
609 %fneg = fsub half -0.000000e+00, %min
610 store half %fneg, half addrspace(1)* %out.gep
614 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
615 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
617 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
618 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
619 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
621 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
622 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
624 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
625 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
626 %tid = call i32 @llvm.amdgcn.workitem.id.x()
627 %tid.ext = sext i32 %tid to i64
628 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
629 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
630 %a = load volatile half, half addrspace(1)* %a.gep
631 %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
632 %fneg = fsub half -0.000000e+00, %min
633 store half %fneg, half addrspace(1)* %out.gep
637 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
638 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
640 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
641 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
642 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
643 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
645 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
646 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
648 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
649 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
650 %tid = call i32 @llvm.amdgcn.workitem.id.x()
651 %tid.ext = sext i32 %tid to i64
652 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
653 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
654 %a = load volatile double, double addrspace(1)* %a.gep
655 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
656 %fneg = fsub double -0.000000e+00, %min
657 store double %fneg, double addrspace(1)* %out.gep
661 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
662 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
664 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
665 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
666 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
667 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
669 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
670 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
672 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
673 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
674 %tid = call i32 @llvm.amdgcn.workitem.id.x()
675 %tid.ext = sext i32 %tid to i64
676 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
677 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
678 %a = load volatile double, double addrspace(1)* %a.gep
679 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
680 %fneg = fsub double -0.000000e+00, %min
681 store double %fneg, double addrspace(1)* %out.gep
685 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
687 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
689 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
690 %min = call float @llvm.minnum.f32(float -0.0, float %a)
691 %fneg = fsub float -0.000000e+00, %min
695 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
697 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
698 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
699 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
700 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
701 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
702 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
703 %tid = call i32 @llvm.amdgcn.workitem.id.x()
704 %tid.ext = sext i32 %tid to i64
705 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
706 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
707 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
708 %a = load volatile float, float addrspace(1)* %a.gep
709 %b = load volatile float, float addrspace(1)* %b.gep
710 %min = call float @llvm.minnum.f32(float 0.0, float %a)
711 %fneg = fsub float -0.000000e+00, %min
712 %mul = fmul float %fneg, %b
713 store float %mul, float addrspace(1)* %out.gep
717 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
718 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
719 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
721 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
723 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
724 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
726 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
727 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
728 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
730 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
731 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
732 %tid = call i32 @llvm.amdgcn.workitem.id.x()
733 %tid.ext = sext i32 %tid to i64
734 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
735 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
736 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
737 %a = load volatile float, float addrspace(1)* %a.gep
738 %b = load volatile float, float addrspace(1)* %b.gep
739 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
740 %fneg = fsub float -0.000000e+00, %min
741 %mul = fmul float %fneg, %b
742 store float %mul, float addrspace(1)* %out.gep
746 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
749 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
750 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
752 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
753 %min = call float @llvm.minnum.f32(float 0.0, float %a)
754 %fneg = fsub float -0.000000e+00, %min
755 %mul = fmul float %fneg, %b
759 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
760 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
761 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
762 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
763 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
764 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
765 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
766 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
767 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
768 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
769 %tid = call i32 @llvm.amdgcn.workitem.id.x()
770 %tid.ext = sext i32 %tid to i64
771 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
772 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
773 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
774 %a = load volatile float, float addrspace(1)* %a.gep
775 %b = load volatile float, float addrspace(1)* %b.gep
776 %min = call float @llvm.minnum.f32(float %a, float %b)
777 %fneg = fsub float -0.000000e+00, %min
778 %use1 = fmul float %min, 4.0
779 store volatile float %fneg, float addrspace(1)* %out
780 store volatile float %use1, float addrspace(1)* %out
784 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
787 ; GCN: v_max_f32_e64 v0, -v0, -v1
788 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
790 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
791 %min = call float @llvm.minnum.f32(float %a, float %b)
792 %fneg = fsub float -0.000000e+00, %min
793 %use1 = fmul float %min, 4.0
794 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
795 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
796 ret <2 x float> %ins1
799 ; --------------------------------------------------------------------------------
801 ; --------------------------------------------------------------------------------
804 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
805 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
806 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
807 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
808 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
809 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
810 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
811 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
812 %tid = call i32 @llvm.amdgcn.workitem.id.x()
813 %tid.ext = sext i32 %tid to i64
814 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
815 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
816 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
817 %a = load volatile float, float addrspace(1)* %a.gep
818 %b = load volatile float, float addrspace(1)* %b.gep
819 %max = call float @llvm.maxnum.f32(float %a, float %b)
820 %fneg = fsub float -0.000000e+00, %max
821 store float %fneg, float addrspace(1)* %out.gep
825 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
828 ; GCN: v_min_f32_e64 v0, -v0, -v1
830 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
831 %max = call float @llvm.maxnum.f32(float %a, float %b)
832 %fneg = fsub float -0.000000e+00, %max
836 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
837 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
838 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
839 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
840 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
841 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
842 %tid = call i32 @llvm.amdgcn.workitem.id.x()
843 %tid.ext = sext i32 %tid to i64
844 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
845 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
846 %a = load volatile float, float addrspace(1)* %a.gep
847 %max = call float @llvm.maxnum.f32(float %a, float %a)
848 %max.fneg = fsub float -0.0, %max
849 store float %max.fneg, float addrspace(1)* %out.gep
853 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
855 ; GCN: v_min_f32_e64 v0, -v0, -v0
857 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
858 %max = call float @llvm.maxnum.f32(float %a, float %a)
859 %max.fneg = fsub float -0.0, %max
863 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
864 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
865 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
866 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
867 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
868 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
869 %tid = call i32 @llvm.amdgcn.workitem.id.x()
870 %tid.ext = sext i32 %tid to i64
871 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
872 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
873 %a = load volatile float, float addrspace(1)* %a.gep
874 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
875 %fneg = fsub float -0.000000e+00, %max
876 store float %fneg, float addrspace(1)* %out.gep
880 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
882 ; GCN: v_min_f32_e64 v0, -v0, -4.0
884 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
885 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
886 %fneg = fsub float -0.000000e+00, %max
890 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
891 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
892 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
893 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
894 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
895 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
896 %tid = call i32 @llvm.amdgcn.workitem.id.x()
897 %tid.ext = sext i32 %tid to i64
898 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
899 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
900 %a = load volatile float, float addrspace(1)* %a.gep
901 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
902 %fneg = fsub float -0.000000e+00, %max
903 store float %fneg, float addrspace(1)* %out.gep
907 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
909 ; GCN: v_min_f32_e64 v0, -v0, 4.0
911 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
912 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
913 %fneg = fsub float -0.000000e+00, %max
917 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
918 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
919 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
920 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
921 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
922 %tid = call i32 @llvm.amdgcn.workitem.id.x()
923 %tid.ext = sext i32 %tid to i64
924 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
925 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
926 %a = load volatile float, float addrspace(1)* %a.gep
927 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
928 %fneg = fsub float -0.000000e+00, %max
929 store float %fneg, float addrspace(1)* %out.gep
933 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
934 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
935 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
936 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
937 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
938 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
939 %tid = call i32 @llvm.amdgcn.workitem.id.x()
940 %tid.ext = sext i32 %tid to i64
941 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
942 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
943 %a = load volatile float, float addrspace(1)* %a.gep
944 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
945 %fneg = fsub float -0.000000e+00, %max
946 store float %fneg, float addrspace(1)* %out.gep
950 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
952 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
954 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
955 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
956 %fneg = fsub float -0.000000e+00, %max
960 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
961 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
962 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
963 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
964 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
965 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
966 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
967 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
968 %tid = call i32 @llvm.amdgcn.workitem.id.x()
969 %tid.ext = sext i32 %tid to i64
970 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
971 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
972 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
973 %a = load volatile float, float addrspace(1)* %a.gep
974 %b = load volatile float, float addrspace(1)* %b.gep
975 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
976 %fneg = fsub float -0.000000e+00, %max
977 %mul = fmul float %fneg, %b
978 store float %mul, float addrspace(1)* %out.gep
982 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
985 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
986 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
988 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
989 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
990 %fneg = fsub float -0.000000e+00, %max
991 %mul = fmul float %fneg, %b
995 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
996 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
997 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
998 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
999 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1000 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1001 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1002 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1003 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1004 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1005 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1006 %tid.ext = sext i32 %tid to i64
1007 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1008 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1009 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1010 %a = load volatile float, float addrspace(1)* %a.gep
1011 %b = load volatile float, float addrspace(1)* %b.gep
1012 %max = call float @llvm.maxnum.f32(float %a, float %b)
1013 %fneg = fsub float -0.000000e+00, %max
1014 %use1 = fmul float %max, 4.0
1015 store volatile float %fneg, float addrspace(1)* %out
1016 store volatile float %use1, float addrspace(1)* %out
1020 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1023 ; GCN: v_min_f32_e64 v0, -v0, -v1
1024 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1025 ; GCN-NEXT: ; return
1026 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1027 %max = call float @llvm.maxnum.f32(float %a, float %b)
1028 %fneg = fsub float -0.000000e+00, %max
1029 %use1 = fmul float %max, 4.0
1030 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1031 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1032 ret <2 x float> %ins1
1035 ; --------------------------------------------------------------------------------
1037 ; --------------------------------------------------------------------------------
1039 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1040 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1041 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1042 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1044 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1045 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1047 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1048 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1049 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1050 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1051 %tid.ext = sext i32 %tid to i64
1052 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1053 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1054 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1055 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1056 %a = load volatile float, float addrspace(1)* %a.gep
1057 %b = load volatile float, float addrspace(1)* %b.gep
1058 %c = load volatile float, float addrspace(1)* %c.gep
1059 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1060 %fneg = fsub float -0.000000e+00, %fma
1061 store float %fneg, float addrspace(1)* %out.gep
1065 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1066 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1067 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1068 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1069 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1070 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1071 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1072 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1073 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1074 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1075 %tid.ext = sext i32 %tid to i64
1076 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1077 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1078 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1079 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1080 %a = load volatile float, float addrspace(1)* %a.gep
1081 %b = load volatile float, float addrspace(1)* %b.gep
1082 %c = load volatile float, float addrspace(1)* %c.gep
1083 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1084 %fneg = fsub float -0.000000e+00, %fma
1085 store volatile float %fneg, float addrspace(1)* %out
1086 store volatile float %fma, float addrspace(1)* %out
1090 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1091 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1092 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1093 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1095 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1096 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1097 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1099 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1100 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1102 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1103 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1104 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1105 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1106 %tid.ext = sext i32 %tid to i64
1107 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1108 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1109 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1110 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1111 %a = load volatile float, float addrspace(1)* %a.gep
1112 %b = load volatile float, float addrspace(1)* %b.gep
1113 %c = load volatile float, float addrspace(1)* %c.gep
1114 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1115 %fneg = fsub float -0.000000e+00, %fma
1116 %use1 = fmul float %fma, 4.0
1117 store volatile float %fneg, float addrspace(1)* %out
1118 store volatile float %use1, float addrspace(1)* %out
1122 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1123 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1124 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1125 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1127 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1128 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1130 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1131 ; GCN-NSZ-NOT: [[FMA]]
1132 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1133 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1134 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1135 %tid.ext = sext i32 %tid to i64
1136 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1137 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1138 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1139 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1140 %a = load volatile float, float addrspace(1)* %a.gep
1141 %b = load volatile float, float addrspace(1)* %b.gep
1142 %c = load volatile float, float addrspace(1)* %c.gep
1143 %fneg.a = fsub float -0.000000e+00, %a
1144 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1145 %fneg = fsub float -0.000000e+00, %fma
1146 store volatile float %fneg, float addrspace(1)* %out
1150 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1151 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1152 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1153 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1155 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1156 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1158 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1159 ; GCN-NSZ-NOT: [[FMA]]
1160 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1161 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1162 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1163 %tid.ext = sext i32 %tid to i64
1164 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1165 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1166 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1167 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1168 %a = load volatile float, float addrspace(1)* %a.gep
1169 %b = load volatile float, float addrspace(1)* %b.gep
1170 %c = load volatile float, float addrspace(1)* %c.gep
1171 %fneg.b = fsub float -0.000000e+00, %b
1172 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1173 %fneg = fsub float -0.000000e+00, %fma
1174 store volatile float %fneg, float addrspace(1)* %out
1178 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1179 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1180 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1181 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1183 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1184 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1186 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1187 ; GCN-NSZ-NOT: [[FMA]]
1188 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1189 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1190 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1191 %tid.ext = sext i32 %tid to i64
1192 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1193 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1194 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1195 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1196 %a = load volatile float, float addrspace(1)* %a.gep
1197 %b = load volatile float, float addrspace(1)* %b.gep
1198 %c = load volatile float, float addrspace(1)* %c.gep
1199 %fneg.a = fsub float -0.000000e+00, %a
1200 %fneg.b = fsub float -0.000000e+00, %b
1201 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1202 %fneg = fsub float -0.000000e+00, %fma
1203 store volatile float %fneg, float addrspace(1)* %out
1207 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1208 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1209 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1210 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1212 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1213 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1215 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1216 ; GCN-NSZ-NOT: [[FMA]]
1217 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1218 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1220 %tid.ext = sext i32 %tid to i64
1221 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1222 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1223 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1224 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1225 %a = load volatile float, float addrspace(1)* %a.gep
1226 %b = load volatile float, float addrspace(1)* %b.gep
1227 %c = load volatile float, float addrspace(1)* %c.gep
1228 %fneg.a = fsub float -0.000000e+00, %a
1229 %fneg.c = fsub float -0.000000e+00, %c
1230 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1231 %fneg = fsub float -0.000000e+00, %fma
1232 store volatile float %fneg, float addrspace(1)* %out
1236 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1237 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1238 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1239 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1241 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1242 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1244 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1245 ; GCN-NSZ-NOT: [[FMA]]
1246 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1247 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1248 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1249 %tid.ext = sext i32 %tid to i64
1250 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1251 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1252 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1253 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1254 %a = load volatile float, float addrspace(1)* %a.gep
1255 %b = load volatile float, float addrspace(1)* %b.gep
1256 %c = load volatile float, float addrspace(1)* %c.gep
1257 %fneg.c = fsub float -0.000000e+00, %c
1258 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1259 %fneg = fsub float -0.000000e+00, %fma
1260 store volatile float %fneg, float addrspace(1)* %out
1264 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1265 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1266 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1267 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1269 ; GCN-SAFE: v_xor_b32
1270 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1271 ; GCN-SAFE: v_xor_b32
1273 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1274 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1276 ; GCN-NSZ-NOT: [[FMA]]
1277 ; GCN-NSZ-NOT: [[NEG_A]]
1278 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1279 ; GCN-NSZ-NOT: [[NEG_A]]
1280 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1281 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1282 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1283 %tid.ext = sext i32 %tid to i64
1284 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1285 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1286 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1287 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1288 %a = load volatile float, float addrspace(1)* %a.gep
1289 %b = load volatile float, float addrspace(1)* %b.gep
1290 %c = load volatile float, float addrspace(1)* %c.gep
1291 %fneg.a = fsub float -0.000000e+00, %a
1292 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1293 %fneg = fsub float -0.000000e+00, %fma
1294 store volatile float %fneg, float addrspace(1)* %out
1295 store volatile float %fneg.a, float addrspace(1)* %out
1299 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1300 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1301 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1302 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1304 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1305 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1306 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1308 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1309 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1310 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1311 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1312 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1313 %tid.ext = sext i32 %tid to i64
1314 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1315 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1316 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1317 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1318 %a = load volatile float, float addrspace(1)* %a.gep
1319 %b = load volatile float, float addrspace(1)* %b.gep
1320 %c = load volatile float, float addrspace(1)* %c.gep
1321 %fneg.a = fsub float -0.000000e+00, %a
1322 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1323 %fneg = fsub float -0.000000e+00, %fma
1324 %use1 = fmul float %fneg.a, %d
1325 store volatile float %fneg, float addrspace(1)* %out
1326 store volatile float %use1, float addrspace(1)* %out
1330 ; --------------------------------------------------------------------------------
1332 ; --------------------------------------------------------------------------------
1334 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1335 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1336 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1337 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1339 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1340 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1342 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1343 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1344 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1345 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1346 %tid.ext = sext i32 %tid to i64
1347 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1348 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1349 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1350 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1351 %a = load volatile float, float addrspace(1)* %a.gep
1352 %b = load volatile float, float addrspace(1)* %b.gep
1353 %c = load volatile float, float addrspace(1)* %c.gep
1354 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1355 %fneg = fsub float -0.000000e+00, %fma
1356 store float %fneg, float addrspace(1)* %out.gep
1360 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1361 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1362 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1363 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1365 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1366 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1367 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1369 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]]
1370 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1372 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1373 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1374 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1375 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1376 %tid.ext = sext i32 %tid to i64
1377 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1378 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1379 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1380 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1381 %a = load volatile float, float addrspace(1)* %a.gep
1382 %b = load volatile float, float addrspace(1)* %b.gep
1383 %c = load volatile float, float addrspace(1)* %c.gep
1384 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1385 %fneg = fsub float -0.000000e+00, %fma
1386 %use1 = fmul float %fma, 4.0
1387 store volatile float %fneg, float addrspace(1)* %out
1388 store volatile float %use1, float addrspace(1)* %out
1392 ; --------------------------------------------------------------------------------
1394 ; --------------------------------------------------------------------------------
1396 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1397 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1398 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1399 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1400 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1401 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1402 %tid.ext = sext i32 %tid to i64
1403 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1404 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1405 %a = load volatile float, float addrspace(1)* %a.gep
1406 %fpext = fpext float %a to double
1407 %fneg = fsub double -0.000000e+00, %fpext
1408 store double %fneg, double addrspace(1)* %out.gep
1412 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1413 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1414 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1415 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1416 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1417 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1418 %tid.ext = sext i32 %tid to i64
1419 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1420 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1421 %a = load volatile float, float addrspace(1)* %a.gep
1422 %fneg.a = fsub float -0.000000e+00, %a
1423 %fpext = fpext float %fneg.a to double
1424 %fneg = fsub double -0.000000e+00, %fpext
1425 store double %fneg, double addrspace(1)* %out.gep
1429 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1430 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1431 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1432 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1433 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1434 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1435 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1436 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1437 %tid.ext = sext i32 %tid to i64
1438 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1439 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1440 %a = load volatile float, float addrspace(1)* %a.gep
1441 %fneg.a = fsub float -0.000000e+00, %a
1442 %fpext = fpext float %fneg.a to double
1443 %fneg = fsub double -0.000000e+00, %fpext
1444 store volatile double %fneg, double addrspace(1)* %out.gep
1445 store volatile float %fneg.a, float addrspace(1)* undef
1449 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1450 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1451 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1452 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1453 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1454 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1455 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1456 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1457 %tid.ext = sext i32 %tid to i64
1458 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1459 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1460 %a = load volatile float, float addrspace(1)* %a.gep
1461 %fpext = fpext float %a to double
1462 %fneg = fsub double -0.000000e+00, %fpext
1463 store volatile double %fneg, double addrspace(1)* %out.gep
1464 store volatile double %fpext, double addrspace(1)* undef
1468 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1469 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1470 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1471 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1472 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1473 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1474 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1475 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1476 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1477 %tid.ext = sext i32 %tid to i64
1478 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1479 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1480 %a = load volatile float, float addrspace(1)* %a.gep
1481 %fpext = fpext float %a to double
1482 %fneg = fsub double -0.000000e+00, %fpext
1483 %mul = fmul double %fpext, 4.0
1484 store volatile double %fneg, double addrspace(1)* %out.gep
1485 store volatile double %mul, double addrspace(1)* %out.gep
1489 ; FIXME: Source modifiers not folded for f16->f32
1490 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1491 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1492 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1493 %tid.ext = sext i32 %tid to i64
1494 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1495 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1496 %a = load volatile half, half addrspace(1)* %a.gep
1497 %fpext = fpext half %a to float
1498 %fneg = fsub float -0.000000e+00, %fpext
1499 store volatile float %fneg, float addrspace(1)* %out.gep
1500 store volatile float %fpext, float addrspace(1)* %out.gep
1504 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1505 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1506 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1507 %tid.ext = sext i32 %tid to i64
1508 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1509 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1510 %a = load volatile half, half addrspace(1)* %a.gep
1511 %fpext = fpext half %a to float
1512 %fneg = fsub float -0.000000e+00, %fpext
1513 %mul = fmul float %fpext, 4.0
1514 store volatile float %fneg, float addrspace(1)* %out.gep
1515 store volatile float %mul, float addrspace(1)* %out.gep
1519 ; --------------------------------------------------------------------------------
1521 ; --------------------------------------------------------------------------------
1523 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1524 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1525 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1526 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1527 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1528 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1529 %tid.ext = sext i32 %tid to i64
1530 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1531 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1532 %a = load volatile double, double addrspace(1)* %a.gep
1533 %fpround = fptrunc double %a to float
1534 %fneg = fsub float -0.000000e+00, %fpround
1535 store float %fneg, float addrspace(1)* %out.gep
1539 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1540 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1541 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1542 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1543 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1544 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1545 %tid.ext = sext i32 %tid to i64
1546 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1547 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1548 %a = load volatile double, double addrspace(1)* %a.gep
1549 %fneg.a = fsub double -0.000000e+00, %a
1550 %fpround = fptrunc double %fneg.a to float
1551 %fneg = fsub float -0.000000e+00, %fpround
1552 store float %fneg, float addrspace(1)* %out.gep
1556 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1557 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1558 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1559 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1560 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1561 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1562 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1563 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1564 %tid.ext = sext i32 %tid to i64
1565 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1566 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1567 %a = load volatile double, double addrspace(1)* %a.gep
1568 %fneg.a = fsub double -0.000000e+00, %a
1569 %fpround = fptrunc double %fneg.a to float
1570 %fneg = fsub float -0.000000e+00, %fpround
1571 store volatile float %fneg, float addrspace(1)* %out.gep
1572 store volatile double %fneg.a, double addrspace(1)* undef
1576 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1577 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1578 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1579 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1581 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1582 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1583 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1584 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1585 %tid.ext = sext i32 %tid to i64
1586 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1587 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1588 %a = load volatile double, double addrspace(1)* %a.gep
1589 %fneg.a = fsub double -0.000000e+00, %a
1590 %fpround = fptrunc double %fneg.a to float
1591 %fneg = fsub float -0.000000e+00, %fpround
1592 %use1 = fmul double %fneg.a, %c
1593 store volatile float %fneg, float addrspace(1)* %out.gep
1594 store volatile double %use1, double addrspace(1)* undef
1598 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1599 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1600 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1601 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1602 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1603 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1604 %tid.ext = sext i32 %tid to i64
1605 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1606 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1607 %a = load volatile float, float addrspace(1)* %a.gep
1608 %fpround = fptrunc float %a to half
1609 %fneg = fsub half -0.000000e+00, %fpround
1610 store half %fneg, half addrspace(1)* %out.gep
1614 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1615 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1616 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1617 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1618 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1619 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1620 %tid.ext = sext i32 %tid to i64
1621 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1622 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1623 %a = load volatile float, float addrspace(1)* %a.gep
1624 %fneg.a = fsub float -0.000000e+00, %a
1625 %fpround = fptrunc float %fneg.a to half
1626 %fneg = fsub half -0.000000e+00, %fpround
1627 store half %fneg, half addrspace(1)* %out.gep
1631 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1632 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1633 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1634 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1635 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1636 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1637 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1638 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1639 %tid.ext = sext i32 %tid to i64
1640 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1641 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1642 %a = load volatile double, double addrspace(1)* %a.gep
1643 %fpround = fptrunc double %a to float
1644 %fneg = fsub float -0.000000e+00, %fpround
1645 store volatile float %fneg, float addrspace(1)* %out.gep
1646 store volatile float %fpround, float addrspace(1)* %out.gep
1650 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1651 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1652 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1653 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1654 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1655 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1656 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1657 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1658 %tid.ext = sext i32 %tid to i64
1659 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1660 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1661 %a = load volatile float, float addrspace(1)* %a.gep
1662 %fneg.a = fsub float -0.000000e+00, %a
1663 %fpround = fptrunc float %fneg.a to half
1664 %fneg = fsub half -0.000000e+00, %fpround
1665 store volatile half %fneg, half addrspace(1)* %out.gep
1666 store volatile float %fneg.a, float addrspace(1)* undef
1670 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1671 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1672 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1673 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1674 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1675 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1676 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1677 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1678 %tid.ext = sext i32 %tid to i64
1679 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1680 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1681 %a = load volatile float, float addrspace(1)* %a.gep
1682 %fneg.a = fsub float -0.000000e+00, %a
1683 %fpround = fptrunc float %fneg.a to half
1684 %fneg = fsub half -0.000000e+00, %fpround
1685 %use1 = fmul float %fneg.a, %c
1686 store volatile half %fneg, half addrspace(1)* %out.gep
1687 store volatile float %use1, float addrspace(1)* undef
1691 ; --------------------------------------------------------------------------------
1693 ; --------------------------------------------------------------------------------
1695 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1697 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1698 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1699 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1700 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1701 %tid.ext = sext i32 %tid to i64
1702 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1703 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1704 %a = load volatile float, float addrspace(1)* %a.gep
1705 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1706 %fneg = fsub float -0.000000e+00, %rcp
1707 store float %fneg, float addrspace(1)* %out.gep
1711 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1712 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1713 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1714 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1715 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1716 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1717 %tid.ext = sext i32 %tid to i64
1718 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1719 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1720 %a = load volatile float, float addrspace(1)* %a.gep
1721 %fneg.a = fsub float -0.000000e+00, %a
1722 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1723 %fneg = fsub float -0.000000e+00, %rcp
1724 store float %fneg, float addrspace(1)* %out.gep
1728 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1729 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1730 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1731 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1732 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1733 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1734 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1735 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1736 %tid.ext = sext i32 %tid to i64
1737 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1738 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1739 %a = load volatile float, float addrspace(1)* %a.gep
1740 %fneg.a = fsub float -0.000000e+00, %a
1741 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1742 %fneg = fsub float -0.000000e+00, %rcp
1743 store volatile float %fneg, float addrspace(1)* %out.gep
1744 store volatile float %fneg.a, float addrspace(1)* undef
1748 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1749 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1750 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1751 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1753 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1754 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1755 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1756 %tid.ext = sext i32 %tid to i64
1757 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1758 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1759 %a = load volatile float, float addrspace(1)* %a.gep
1760 %fneg.a = fsub float -0.000000e+00, %a
1761 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1762 %fneg = fsub float -0.000000e+00, %rcp
1763 %use1 = fmul float %fneg.a, %c
1764 store volatile float %fneg, float addrspace(1)* %out.gep
1765 store volatile float %use1, float addrspace(1)* undef
1769 ; --------------------------------------------------------------------------------
1771 ; --------------------------------------------------------------------------------
1773 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1774 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1775 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1776 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1777 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1778 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1779 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1780 %tid.ext = sext i32 %tid to i64
1781 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1782 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1783 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1784 %a = load volatile float, float addrspace(1)* %a.gep
1785 %b = load volatile float, float addrspace(1)* %b.gep
1786 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1787 %fneg = fsub float -0.000000e+00, %mul
1788 store float %fneg, float addrspace(1)* %out.gep
1792 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1793 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1794 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1795 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1796 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1797 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1798 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1799 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1800 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1801 %tid.ext = sext i32 %tid to i64
1802 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1803 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1804 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1805 %a = load volatile float, float addrspace(1)* %a.gep
1806 %b = load volatile float, float addrspace(1)* %b.gep
1807 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1808 %fneg = fsub float -0.000000e+00, %mul
1809 store volatile float %fneg, float addrspace(1)* %out
1810 store volatile float %mul, float addrspace(1)* %out
1814 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1815 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1816 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1817 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1818 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1819 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1820 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1821 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1822 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1823 %tid.ext = sext i32 %tid to i64
1824 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1825 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1826 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1827 %a = load volatile float, float addrspace(1)* %a.gep
1828 %b = load volatile float, float addrspace(1)* %b.gep
1829 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1830 %fneg = fsub float -0.000000e+00, %mul
1831 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1832 store volatile float %fneg, float addrspace(1)* %out
1833 store volatile float %use1, float addrspace(1)* %out
1837 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1838 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1839 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1840 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1841 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1842 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1843 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1844 %tid.ext = sext i32 %tid to i64
1845 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1846 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1847 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1848 %a = load volatile float, float addrspace(1)* %a.gep
1849 %b = load volatile float, float addrspace(1)* %b.gep
1850 %fneg.a = fsub float -0.000000e+00, %a
1851 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1852 %fneg = fsub float -0.000000e+00, %mul
1853 store volatile float %fneg, float addrspace(1)* %out
1857 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1858 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1859 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1860 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1861 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1862 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1863 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1864 %tid.ext = sext i32 %tid to i64
1865 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1866 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1868 %a = load volatile float, float addrspace(1)* %a.gep
1869 %b = load volatile float, float addrspace(1)* %b.gep
1870 %fneg.b = fsub float -0.000000e+00, %b
1871 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1872 %fneg = fsub float -0.000000e+00, %mul
1873 store volatile float %fneg, float addrspace(1)* %out
1877 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1878 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1879 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1880 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1881 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1882 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1883 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1884 %tid.ext = sext i32 %tid to i64
1885 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1886 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1887 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1888 %a = load volatile float, float addrspace(1)* %a.gep
1889 %b = load volatile float, float addrspace(1)* %b.gep
1890 %fneg.a = fsub float -0.000000e+00, %a
1891 %fneg.b = fsub float -0.000000e+00, %b
1892 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1893 %fneg = fsub float -0.000000e+00, %mul
1894 store volatile float %fneg, float addrspace(1)* %out
1898 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1899 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1900 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1901 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1902 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1903 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1904 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1905 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1906 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1907 %tid.ext = sext i32 %tid to i64
1908 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1909 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1910 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1911 %a = load volatile float, float addrspace(1)* %a.gep
1912 %b = load volatile float, float addrspace(1)* %b.gep
1913 %fneg.a = fsub float -0.000000e+00, %a
1914 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1915 %fneg = fsub float -0.000000e+00, %mul
1916 store volatile float %fneg, float addrspace(1)* %out
1917 store volatile float %fneg.a, float addrspace(1)* %out
1921 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1922 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1923 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1924 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1925 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1926 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1927 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1928 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1929 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1930 %tid.ext = sext i32 %tid to i64
1931 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1932 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1933 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1934 %a = load volatile float, float addrspace(1)* %a.gep
1935 %b = load volatile float, float addrspace(1)* %b.gep
1936 %fneg.a = fsub float -0.000000e+00, %a
1937 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1938 %fneg = fsub float -0.000000e+00, %mul
1939 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1940 store volatile float %fneg, float addrspace(1)* %out
1941 store volatile float %use1, float addrspace(1)* %out
1945 ; --------------------------------------------------------------------------------
1947 ; --------------------------------------------------------------------------------
1949 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1950 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1951 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1952 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1953 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1954 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1955 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1956 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1957 %tid.ext = sext i32 %tid to i64
1958 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1959 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1960 %a = load volatile float, float addrspace(1)* %a.gep
1961 %sin = call float @llvm.sin.f32(float %a)
1962 %fneg = fsub float -0.000000e+00, %sin
1963 store float %fneg, float addrspace(1)* %out.gep
1967 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1968 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1969 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1970 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1971 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1972 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1973 %tid.ext = sext i32 %tid to i64
1974 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1975 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1976 %a = load volatile float, float addrspace(1)* %a.gep
1977 %sin = call float @llvm.amdgcn.sin.f32(float %a)
1978 %fneg = fsub float -0.0, %sin
1979 store float %fneg, float addrspace(1)* %out.gep
1983 ; --------------------------------------------------------------------------------
1985 ; --------------------------------------------------------------------------------
1987 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
1988 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1989 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1990 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1991 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1992 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1993 %tid.ext = sext i32 %tid to i64
1994 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1995 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1996 %a = load volatile float, float addrspace(1)* %a.gep
1997 %trunc = call float @llvm.trunc.f32(float %a)
1998 %fneg = fsub float -0.0, %trunc
1999 store float %fneg, float addrspace(1)* %out.gep
2003 ; --------------------------------------------------------------------------------
2005 ; --------------------------------------------------------------------------------
2007 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2008 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2009 ; GCN: v_trunc_f32_e32
2010 ; GCN: v_sub_f32_e32
2011 ; GCN: v_cndmask_b32
2013 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2014 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2016 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2017 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2018 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2019 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2020 %tid.ext = sext i32 %tid to i64
2021 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2022 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2023 %a = load volatile float, float addrspace(1)* %a.gep
2024 %round = call float @llvm.round.f32(float %a)
2025 %fneg = fsub float -0.0, %round
2026 store float %fneg, float addrspace(1)* %out.gep
2030 ; --------------------------------------------------------------------------------
2032 ; --------------------------------------------------------------------------------
2034 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2035 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2036 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2037 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2038 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2039 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2040 %tid.ext = sext i32 %tid to i64
2041 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2042 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2043 %a = load volatile float, float addrspace(1)* %a.gep
2044 %rint = call float @llvm.rint.f32(float %a)
2045 %fneg = fsub float -0.0, %rint
2046 store float %fneg, float addrspace(1)* %out.gep
2050 ; --------------------------------------------------------------------------------
2052 ; --------------------------------------------------------------------------------
2054 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2055 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2056 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2057 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2058 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2059 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2060 %tid.ext = sext i32 %tid to i64
2061 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2062 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2063 %a = load volatile float, float addrspace(1)* %a.gep
2064 %nearbyint = call float @llvm.nearbyint.f32(float %a)
2065 %fneg = fsub float -0.0, %nearbyint
2066 store float %fneg, float addrspace(1)* %out.gep
2070 ; --------------------------------------------------------------------------------
2071 ; fcanonicalize tests
2072 ; --------------------------------------------------------------------------------
2074 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2075 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2076 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2077 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2078 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2079 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2080 %tid.ext = sext i32 %tid to i64
2081 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2082 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2083 %a = load volatile float, float addrspace(1)* %a.gep
2084 %trunc = call float @llvm.canonicalize.f32(float %a)
2085 %fneg = fsub float -0.0, %trunc
2086 store float %fneg, float addrspace(1)* %out.gep
2090 ; --------------------------------------------------------------------------------
2092 ; --------------------------------------------------------------------------------
2094 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2095 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2096 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2097 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2098 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2099 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2100 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2101 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2102 %tid.ext = sext i32 %tid to i64
2103 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2104 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2105 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2106 %a = load volatile float, float addrspace(1)* %a.gep
2107 %b = load volatile float, float addrspace(1)* %b.gep
2108 %mul = fmul float %a, %b
2109 %fneg = fsub float -0.0, %mul
2110 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2111 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2112 store volatile float %intrp0, float addrspace(1)* %out.gep
2113 store volatile float %intrp1, float addrspace(1)* %out.gep
2117 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2118 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2119 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2120 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2121 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2122 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2123 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2124 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2125 %tid.ext = sext i32 %tid to i64
2126 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2127 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2128 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2129 %a = load volatile float, float addrspace(1)* %a.gep
2130 %b = load volatile float, float addrspace(1)* %b.gep
2131 %mul = fmul float %a, %b
2132 %fneg = fsub float -0.0, %mul
2133 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2134 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2135 store volatile float %intrp0, float addrspace(1)* %out.gep
2136 store volatile float %intrp1, float addrspace(1)* %out.gep
2140 ; --------------------------------------------------------------------------------
2142 ; --------------------------------------------------------------------------------
2144 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2145 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2146 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2147 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2148 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2149 ; GCN: s_cbranch_scc0
2151 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2154 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2155 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2156 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2158 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2159 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2160 %tid.ext = sext i32 %tid to i64
2161 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2162 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2163 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2164 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2165 %a = load volatile float, float addrspace(1)* %a.gep
2166 %b = load volatile float, float addrspace(1)* %b.gep
2167 %c = load volatile float, float addrspace(1)* %c.gep
2168 %mul = fmul float %a, %b
2169 %fneg = fsub float -0.0, %mul
2170 %cmp0 = icmp eq i32 %d, 0
2171 br i1 %cmp0, label %if, label %endif
2174 %mul1 = fmul float %fneg, %c
2175 store volatile float %mul1, float addrspace(1)* %out.gep
2179 store volatile float %mul, float addrspace(1)* %out.gep
2183 ; --------------------------------------------------------------------------------
2185 ; --------------------------------------------------------------------------------
2187 ; Can't fold into use, so should fold into source
2188 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2191 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2192 ; GCN: ; use [[MUL]]
2193 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2194 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2196 %tid.ext = sext i32 %tid to i64
2197 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2198 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2199 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2200 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2201 %a = load volatile float, float addrspace(1)* %a.gep
2202 %b = load volatile float, float addrspace(1)* %b.gep
2203 %c = load volatile float, float addrspace(1)* %c.gep
2204 %mul = fmul float %a, %b
2205 %fneg = fsub float -0.0, %mul
2206 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2207 store volatile float %fneg, float addrspace(1)* %out.gep
2211 ; --------------------------------------------------------------------------------
2213 ; --------------------------------------------------------------------------------
2215 ; Can't fold into use, so should fold into source
2216 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2217 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2218 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2219 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2220 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2221 ; GCN: ; use [[NEG]]
2222 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2223 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2224 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2225 %tid.ext = sext i32 %tid to i64
2226 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2227 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2228 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2229 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2230 %a = load volatile float, float addrspace(1)* %a.gep
2231 %b = load volatile float, float addrspace(1)* %b.gep
2232 %c = load volatile float, float addrspace(1)* %c.gep
2233 %mul = fmul float %a, %b
2234 %fneg = fsub float -0.0, %mul
2235 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2236 store volatile float %mul, float addrspace(1)* %out.gep
2240 ; --------------------------------------------------------------------------------
2241 ; code size regression tests
2242 ; --------------------------------------------------------------------------------
2244 ; There are multiple users of the fneg that must use a VOP3
2245 ; instruction, so there is no penalty
2246 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2247 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2248 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2249 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2251 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2252 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2254 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2255 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2256 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2258 %tid.ext = sext i32 %tid to i64
2259 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2260 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2261 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2262 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2263 %a = load volatile float, float addrspace(1)* %a.gep
2264 %b = load volatile float, float addrspace(1)* %b.gep
2265 %c = load volatile float, float addrspace(1)* %c.gep
2267 %fneg.a = fsub float -0.0, %a
2268 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2269 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2271 store volatile float %fma0, float addrspace(1)* %out
2272 store volatile float %fma1, float addrspace(1)* %out
2276 ; There are multiple users, but both require using a larger encoding
2279 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2280 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2281 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2282 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2284 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2285 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2286 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2287 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2288 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2289 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2290 %tid.ext = sext i32 %tid to i64
2291 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2292 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2293 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2294 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2295 %a = load volatile float, float addrspace(1)* %a.gep
2296 %b = load volatile float, float addrspace(1)* %b.gep
2297 %c = load volatile float, float addrspace(1)* %c.gep
2299 %fneg.a = fsub float -0.0, %a
2300 %mul0 = fmul float %fneg.a, %b
2301 %mul1 = fmul float %fneg.a, %c
2303 store volatile float %mul0, float addrspace(1)* %out
2304 store volatile float %mul1, float addrspace(1)* %out
2308 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2309 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2310 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2311 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2312 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2314 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2315 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2317 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2318 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2319 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2320 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2321 %tid.ext = sext i32 %tid to i64
2322 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2323 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2324 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2325 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2326 %a = load volatile float, float addrspace(1)* %a.gep
2327 %b = load volatile float, float addrspace(1)* %b.gep
2328 %c = load volatile float, float addrspace(1)* %c.gep
2330 %fneg.a = fsub float -0.0, %a
2331 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2332 %mul1 = fmul float %fneg.a, %c
2334 store volatile float %fma0, float addrspace(1)* %out
2335 store volatile float %mul1, float addrspace(1)* %out
2339 ; The use of the fneg requires a code size increase, but folding into
2340 ; the source does not
2342 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2343 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2344 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2345 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2346 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2348 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2349 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2350 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2352 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2353 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2354 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2356 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2357 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2358 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2359 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2360 %tid.ext = sext i32 %tid to i64
2361 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2362 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2363 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2364 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2365 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2366 %a = load volatile float, float addrspace(1)* %a.gep
2367 %b = load volatile float, float addrspace(1)* %b.gep
2368 %c = load volatile float, float addrspace(1)* %c.gep
2369 %d = load volatile float, float addrspace(1)* %d.gep
2371 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2372 %fneg.fma0 = fsub float -0.0, %fma0
2373 %mul1 = fmul float %fneg.fma0, %c
2374 %mul2 = fmul float %fneg.fma0, %d
2376 store volatile float %mul1, float addrspace(1)* %out
2377 store volatile float %mul2, float addrspace(1)* %out
2381 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2382 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2383 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2384 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2385 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2387 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2388 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2389 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2391 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2392 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2393 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2394 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2395 %tid.ext = sext i32 %tid to i64
2396 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2397 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2398 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2399 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2400 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2401 %a = load volatile double, double addrspace(1)* %a.gep
2402 %b = load volatile double, double addrspace(1)* %b.gep
2403 %c = load volatile double, double addrspace(1)* %c.gep
2404 %d = load volatile double, double addrspace(1)* %d.gep
2406 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2407 %fneg.fma0 = fsub double -0.0, %fma0
2408 %mul1 = fmul double %fneg.fma0, %c
2409 %mul2 = fmul double %fneg.fma0, %d
2411 store volatile double %mul1, double addrspace(1)* %out
2412 store volatile double %mul2, double addrspace(1)* %out
2416 ; %trunc.a has one fneg use, but it requires a code size increase and
2417 ; %the fneg can instead be folded for free into the fma.
2419 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2420 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2421 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2422 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2423 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2424 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2425 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2426 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2427 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2428 %tid.ext = sext i32 %tid to i64
2429 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2430 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2431 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2432 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2433 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2434 %a = load volatile float, float addrspace(1)* %a.gep
2435 %b = load volatile float, float addrspace(1)* %b.gep
2436 %c = load volatile float, float addrspace(1)* %c.gep
2437 %d = load volatile float, float addrspace(1)* %d.gep
2439 %trunc.a = call float @llvm.trunc.f32(float %a)
2440 %trunc.fneg.a = fsub float -0.0, %trunc.a
2441 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2442 store volatile float %fma0, float addrspace(1)* %out
2446 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2447 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2448 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2449 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2450 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2451 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2452 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2453 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2454 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2455 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2456 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2457 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2458 %tid.ext = sext i32 %tid to i64
2459 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2460 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2461 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2462 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2463 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2464 %a = load volatile float, float addrspace(1)* %a.gep
2465 %b = load volatile float, float addrspace(1)* %b.gep
2466 %c = load volatile float, float addrspace(1)* %c.gep
2467 %d = load volatile float, float addrspace(1)* %d.gep
2469 %trunc.a = call float @llvm.trunc.f32(float %a)
2470 %trunc.fneg.a = fsub float -0.0, %trunc.a
2471 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2472 %mul1 = fmul float %trunc.a, %d
2473 store volatile float %fma0, float addrspace(1)* %out
2474 store volatile float %mul1, float addrspace(1)* %out
2478 declare i32 @llvm.amdgcn.workitem.id.x() #1
2479 declare float @llvm.fma.f32(float, float, float) #1
2480 declare float @llvm.fmuladd.f32(float, float, float) #1
2481 declare float @llvm.sin.f32(float) #1
2482 declare float @llvm.trunc.f32(float) #1
2483 declare float @llvm.round.f32(float) #1
2484 declare float @llvm.rint.f32(float) #1
2485 declare float @llvm.nearbyint.f32(float) #1
2486 declare float @llvm.canonicalize.f32(float) #1
2487 declare float @llvm.minnum.f32(float, float) #1
2488 declare float @llvm.maxnum.f32(float, float) #1
2489 declare half @llvm.minnum.f16(half, half) #1
2490 declare double @llvm.minnum.f64(double, double) #1
2491 declare double @llvm.fma.f64(double, double, double) #1
2493 declare float @llvm.amdgcn.sin.f32(float) #1
2494 declare float @llvm.amdgcn.rcp.f32(float) #1
2495 declare float @llvm.amdgcn.rcp.legacy(float) #1
2496 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2497 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2498 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2500 attributes #0 = { nounwind }
2501 attributes #1 = { nounwind readnone }