1 ; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
4 ; --------------------------------------------------------------------------------
6 ; --------------------------------------------------------------------------------
8 ; GCN-LABEL: {{^}}v_fneg_add_f32:
9 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
10 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
12 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
13 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
15 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
16 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]]
17 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
18 %tid = call i32 @llvm.amdgcn.workitem.id.x()
19 %tid.ext = sext i32 %tid to i64
20 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
21 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
22 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
23 %a = load volatile float, float addrspace(1)* %a.gep
24 %b = load volatile float, float addrspace(1)* %b.gep
25 %add = fadd float %a, %b
26 %fneg = fsub float -0.000000e+00, %add
27 store float %fneg, float addrspace(1)* %out.gep
31 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
32 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
33 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
34 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
35 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
36 ; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
37 ; GCN-NEXT: buffer_store_dword [[ADD]]
38 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %tid.ext = sext i32 %tid to i64
41 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
42 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
43 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
44 %a = load volatile float, float addrspace(1)* %a.gep
45 %b = load volatile float, float addrspace(1)* %b.gep
46 %add = fadd float %a, %b
47 %fneg = fsub float -0.000000e+00, %add
48 store volatile float %fneg, float addrspace(1)* %out
49 store volatile float %add, float addrspace(1)* %out
53 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
54 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
55 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
57 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
58 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
59 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
61 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
62 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
63 ; GCN: buffer_store_dword [[NEG_ADD]]
64 ; GCN-NEXT: buffer_store_dword [[MUL]]
65 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
66 %tid = call i32 @llvm.amdgcn.workitem.id.x()
67 %tid.ext = sext i32 %tid to i64
68 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
69 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
70 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
71 %a = load volatile float, float addrspace(1)* %a.gep
72 %b = load volatile float, float addrspace(1)* %b.gep
73 %add = fadd float %a, %b
74 %fneg = fsub float -0.000000e+00, %add
75 %use1 = fmul float %add, 4.0
76 store volatile float %fneg, float addrspace(1)* %out
77 store volatile float %use1, float addrspace(1)* %out
81 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
82 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
83 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
85 ; GCN-SAFE: v_sub_f32_e32
86 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000,
88 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
89 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
90 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
91 %tid = call i32 @llvm.amdgcn.workitem.id.x()
92 %tid.ext = sext i32 %tid to i64
93 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
94 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
95 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
96 %a = load volatile float, float addrspace(1)* %a.gep
97 %b = load volatile float, float addrspace(1)* %b.gep
98 %fneg.a = fsub float -0.000000e+00, %a
99 %add = fadd float %fneg.a, %b
100 %fneg = fsub float -0.000000e+00, %add
101 store volatile float %fneg, float addrspace(1)* %out
105 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
106 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
107 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
109 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
110 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
112 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
113 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
114 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
115 %tid = call i32 @llvm.amdgcn.workitem.id.x()
116 %tid.ext = sext i32 %tid to i64
117 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
118 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
119 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
120 %a = load volatile float, float addrspace(1)* %a.gep
121 %b = load volatile float, float addrspace(1)* %b.gep
122 %fneg.b = fsub float -0.000000e+00, %b
123 %add = fadd float %a, %fneg.b
124 %fneg = fsub float -0.000000e+00, %add
125 store volatile float %fneg, float addrspace(1)* %out
129 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
130 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
131 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
133 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
134 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
136 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
137 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
138 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
139 %tid = call i32 @llvm.amdgcn.workitem.id.x()
140 %tid.ext = sext i32 %tid to i64
141 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
142 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
143 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
144 %a = load volatile float, float addrspace(1)* %a.gep
145 %b = load volatile float, float addrspace(1)* %b.gep
146 %fneg.a = fsub float -0.000000e+00, %a
147 %fneg.b = fsub float -0.000000e+00, %b
148 %add = fadd float %fneg.a, %fneg.b
149 %fneg = fsub float -0.000000e+00, %add
150 store volatile float %fneg, float addrspace(1)* %out
154 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
155 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
156 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
158 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
159 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
160 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
161 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
163 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
164 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
165 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]]
166 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]]
167 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
168 %tid = call i32 @llvm.amdgcn.workitem.id.x()
169 %tid.ext = sext i32 %tid to i64
170 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
171 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
172 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
173 %a = load volatile float, float addrspace(1)* %a.gep
174 %b = load volatile float, float addrspace(1)* %b.gep
175 %fneg.a = fsub float -0.000000e+00, %a
176 %add = fadd float %fneg.a, %b
177 %fneg = fsub float -0.000000e+00, %add
178 store volatile float %fneg, float addrspace(1)* %out
179 store volatile float %fneg.a, float addrspace(1)* %out
183 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
184 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
185 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
187 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
188 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
189 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
191 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
192 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]]
194 ; GCN-NSZ-NEXT: buffer_store_dword [[MUL]]
195 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
196 %tid = call i32 @llvm.amdgcn.workitem.id.x()
197 %tid.ext = sext i32 %tid to i64
198 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
199 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
200 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
201 %a = load volatile float, float addrspace(1)* %a.gep
202 %b = load volatile float, float addrspace(1)* %b.gep
203 %fneg.a = fsub float -0.000000e+00, %a
204 %add = fadd float %fneg.a, %b
205 %fneg = fsub float -0.000000e+00, %add
206 %use1 = fmul float %fneg.a, %c
207 store volatile float %fneg, float addrspace(1)* %out
208 store volatile float %use1, float addrspace(1)* %out
212 ; --------------------------------------------------------------------------------
214 ; --------------------------------------------------------------------------------
216 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
217 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
218 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
219 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
220 ; GCN-NEXT: buffer_store_dword [[RESULT]]
221 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
222 %tid = call i32 @llvm.amdgcn.workitem.id.x()
223 %tid.ext = sext i32 %tid to i64
224 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
225 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
226 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
227 %a = load volatile float, float addrspace(1)* %a.gep
228 %b = load volatile float, float addrspace(1)* %b.gep
229 %mul = fmul float %a, %b
230 %fneg = fsub float -0.000000e+00, %mul
231 store float %fneg, float addrspace(1)* %out.gep
235 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
236 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
237 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
238 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
239 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
240 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
241 ; GCN: buffer_store_dword [[ADD]]
242 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
243 %tid = call i32 @llvm.amdgcn.workitem.id.x()
244 %tid.ext = sext i32 %tid to i64
245 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
246 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
247 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
248 %a = load volatile float, float addrspace(1)* %a.gep
249 %b = load volatile float, float addrspace(1)* %b.gep
250 %mul = fmul float %a, %b
251 %fneg = fsub float -0.000000e+00, %mul
252 store volatile float %fneg, float addrspace(1)* %out
253 store volatile float %mul, float addrspace(1)* %out
257 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
258 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
259 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
260 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
261 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
262 ; GCN-NEXT: buffer_store_dword [[MUL0]]
263 ; GCN-NEXT: buffer_store_dword [[MUL1]]
264 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
265 %tid = call i32 @llvm.amdgcn.workitem.id.x()
266 %tid.ext = sext i32 %tid to i64
267 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
268 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
269 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
270 %a = load volatile float, float addrspace(1)* %a.gep
271 %b = load volatile float, float addrspace(1)* %b.gep
272 %mul = fmul float %a, %b
273 %fneg = fsub float -0.000000e+00, %mul
274 %use1 = fmul float %mul, 4.0
275 store volatile float %fneg, float addrspace(1)* %out
276 store volatile float %use1, float addrspace(1)* %out
280 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
281 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
282 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
283 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
284 ; GCN-NEXT: buffer_store_dword [[ADD]]
285 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
286 %tid = call i32 @llvm.amdgcn.workitem.id.x()
287 %tid.ext = sext i32 %tid to i64
288 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
289 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
290 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
291 %a = load volatile float, float addrspace(1)* %a.gep
292 %b = load volatile float, float addrspace(1)* %b.gep
293 %fneg.a = fsub float -0.000000e+00, %a
294 %mul = fmul float %fneg.a, %b
295 %fneg = fsub float -0.000000e+00, %mul
296 store volatile float %fneg, float addrspace(1)* %out
300 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
301 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
302 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
303 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
304 ; GCN-NEXT: buffer_store_dword [[ADD]]
305 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
306 %tid = call i32 @llvm.amdgcn.workitem.id.x()
307 %tid.ext = sext i32 %tid to i64
308 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
309 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
310 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
311 %a = load volatile float, float addrspace(1)* %a.gep
312 %b = load volatile float, float addrspace(1)* %b.gep
313 %fneg.b = fsub float -0.000000e+00, %b
314 %mul = fmul float %a, %fneg.b
315 %fneg = fsub float -0.000000e+00, %mul
316 store volatile float %fneg, float addrspace(1)* %out
320 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
321 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
322 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
323 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
324 ; GCN-NEXT: buffer_store_dword [[ADD]]
325 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
326 %tid = call i32 @llvm.amdgcn.workitem.id.x()
327 %tid.ext = sext i32 %tid to i64
328 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
329 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
330 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
331 %a = load volatile float, float addrspace(1)* %a.gep
332 %b = load volatile float, float addrspace(1)* %b.gep
333 %fneg.a = fsub float -0.000000e+00, %a
334 %fneg.b = fsub float -0.000000e+00, %b
335 %mul = fmul float %fneg.a, %fneg.b
336 %fneg = fsub float -0.000000e+00, %mul
337 store volatile float %fneg, float addrspace(1)* %out
341 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
342 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
343 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
344 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
345 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
346 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
347 ; GCN: buffer_store_dword [[NEG_A]]
348 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
349 %tid = call i32 @llvm.amdgcn.workitem.id.x()
350 %tid.ext = sext i32 %tid to i64
351 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
352 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
353 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
354 %a = load volatile float, float addrspace(1)* %a.gep
355 %b = load volatile float, float addrspace(1)* %b.gep
356 %fneg.a = fsub float -0.000000e+00, %a
357 %mul = fmul float %fneg.a, %b
358 %fneg = fsub float -0.000000e+00, %mul
359 store volatile float %fneg, float addrspace(1)* %out
360 store volatile float %fneg.a, float addrspace(1)* %out
364 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
365 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
366 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
367 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
368 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
369 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
370 ; GCN: buffer_store_dword [[MUL]]
371 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
372 %tid = call i32 @llvm.amdgcn.workitem.id.x()
373 %tid.ext = sext i32 %tid to i64
374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
375 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
376 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
377 %a = load volatile float, float addrspace(1)* %a.gep
378 %b = load volatile float, float addrspace(1)* %b.gep
379 %fneg.a = fsub float -0.000000e+00, %a
380 %mul = fmul float %fneg.a, %b
381 %fneg = fsub float -0.000000e+00, %mul
382 %use1 = fmul float %fneg.a, %c
383 store volatile float %fneg, float addrspace(1)* %out
384 store volatile float %use1, float addrspace(1)* %out
388 ; --------------------------------------------------------------------------------
390 ; --------------------------------------------------------------------------------
392 ; GCN-LABEL: {{^}}v_fneg_minnum_f32:
393 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
394 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
395 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
396 ; GCN: buffer_store_dword [[RESULT]]
397 define amdgpu_kernel void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
398 %tid = call i32 @llvm.amdgcn.workitem.id.x()
399 %tid.ext = sext i32 %tid to i64
400 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
401 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
402 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
403 %a = load volatile float, float addrspace(1)* %a.gep
404 %b = load volatile float, float addrspace(1)* %b.gep
405 %min = call float @llvm.minnum.f32(float %a, float %b)
406 %fneg = fsub float -0.000000e+00, %min
407 store float %fneg, float addrspace(1)* %out.gep
411 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32:
412 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
413 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
414 ; GCN: buffer_store_dword [[RESULT]]
415 define amdgpu_kernel void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
416 %tid = call i32 @llvm.amdgcn.workitem.id.x()
417 %tid.ext = sext i32 %tid to i64
418 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
419 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
420 %a = load volatile float, float addrspace(1)* %a.gep
421 %min = call float @llvm.minnum.f32(float %a, float %a)
422 %min.fneg = fsub float -0.0, %min
423 store float %min.fneg, float addrspace(1)* %out.gep
427 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32:
428 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
429 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
430 ; GCN: buffer_store_dword [[RESULT]]
431 define amdgpu_kernel void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
432 %tid = call i32 @llvm.amdgcn.workitem.id.x()
433 %tid.ext = sext i32 %tid to i64
434 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
435 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
436 %a = load volatile float, float addrspace(1)* %a.gep
437 %min = call float @llvm.minnum.f32(float 4.0, float %a)
438 %fneg = fsub float -0.000000e+00, %min
439 store float %fneg, float addrspace(1)* %out.gep
443 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32:
444 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
445 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
446 ; GCN: buffer_store_dword [[RESULT]]
447 define amdgpu_kernel void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
448 %tid = call i32 @llvm.amdgcn.workitem.id.x()
449 %tid.ext = sext i32 %tid to i64
450 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
451 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
452 %a = load volatile float, float addrspace(1)* %a.gep
453 %min = call float @llvm.minnum.f32(float -4.0, float %a)
454 %fneg = fsub float -0.000000e+00, %min
455 store float %fneg, float addrspace(1)* %out.gep
459 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
460 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
461 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
462 ; GCN: buffer_store_dword [[RESULT]]
463 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
464 %tid = call i32 @llvm.amdgcn.workitem.id.x()
465 %tid.ext = sext i32 %tid to i64
466 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
467 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
468 %a = load volatile float, float addrspace(1)* %a.gep
469 %min = call float @llvm.minnum.f32(float 0.0, float %a)
470 %fneg = fsub float -0.000000e+00, %min
471 store float %fneg, float addrspace(1)* %out.gep
475 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32:
476 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
477 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0
478 ; GCN: buffer_store_dword [[RESULT]]
479 define amdgpu_kernel void @v_fneg_neg0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
480 %tid = call i32 @llvm.amdgcn.workitem.id.x()
481 %tid.ext = sext i32 %tid to i64
482 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
484 %a = load volatile float, float addrspace(1)* %a.gep
485 %min = call float @llvm.minnum.f32(float -0.0, float %a)
486 %fneg = fsub float -0.000000e+00, %min
487 store float %fneg, float addrspace(1)* %out.gep
491 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32:
492 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
493 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
494 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
495 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
496 ; GCN: buffer_store_dword [[RESULT]]
497 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
498 %tid = call i32 @llvm.amdgcn.workitem.id.x()
499 %tid.ext = sext i32 %tid to i64
500 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
501 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
502 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
503 %a = load volatile float, float addrspace(1)* %a.gep
504 %b = load volatile float, float addrspace(1)* %b.gep
505 %min = call float @llvm.minnum.f32(float 0.0, float %a)
506 %fneg = fsub float -0.000000e+00, %min
507 %mul = fmul float %fneg, %b
508 store float %mul, float addrspace(1)* %out.gep
512 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32:
513 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
514 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
515 ; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
516 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
517 ; GCN-NEXT: buffer_store_dword [[MAX0]]
518 ; GCN-NEXT: buffer_store_dword [[MUL1]]
519 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
520 %tid = call i32 @llvm.amdgcn.workitem.id.x()
521 %tid.ext = sext i32 %tid to i64
522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
525 %a = load volatile float, float addrspace(1)* %a.gep
526 %b = load volatile float, float addrspace(1)* %b.gep
527 %min = call float @llvm.minnum.f32(float %a, float %b)
528 %fneg = fsub float -0.000000e+00, %min
529 %use1 = fmul float %min, 4.0
530 store volatile float %fneg, float addrspace(1)* %out
531 store volatile float %use1, float addrspace(1)* %out
535 ; --------------------------------------------------------------------------------
537 ; --------------------------------------------------------------------------------
539 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32:
540 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
541 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
542 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
543 ; GCN: buffer_store_dword [[RESULT]]
544 define amdgpu_kernel void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
545 %tid = call i32 @llvm.amdgcn.workitem.id.x()
546 %tid.ext = sext i32 %tid to i64
547 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
548 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
549 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
550 %a = load volatile float, float addrspace(1)* %a.gep
551 %b = load volatile float, float addrspace(1)* %b.gep
552 %min = call float @llvm.maxnum.f32(float %a, float %b)
553 %fneg = fsub float -0.000000e+00, %min
554 store float %fneg, float addrspace(1)* %out.gep
558 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32:
559 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
560 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
561 ; GCN: buffer_store_dword [[RESULT]]
562 define amdgpu_kernel void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
563 %tid = call i32 @llvm.amdgcn.workitem.id.x()
564 %tid.ext = sext i32 %tid to i64
565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
566 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
567 %a = load volatile float, float addrspace(1)* %a.gep
568 %min = call float @llvm.maxnum.f32(float %a, float %a)
569 %min.fneg = fsub float -0.0, %min
570 store float %min.fneg, float addrspace(1)* %out.gep
574 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32:
575 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
576 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
577 ; GCN: buffer_store_dword [[RESULT]]
578 define amdgpu_kernel void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
579 %tid = call i32 @llvm.amdgcn.workitem.id.x()
580 %tid.ext = sext i32 %tid to i64
581 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
582 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
583 %a = load volatile float, float addrspace(1)* %a.gep
584 %min = call float @llvm.maxnum.f32(float 4.0, float %a)
585 %fneg = fsub float -0.000000e+00, %min
586 store float %fneg, float addrspace(1)* %out.gep
590 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32:
591 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
592 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
593 ; GCN: buffer_store_dword [[RESULT]]
594 define amdgpu_kernel void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
595 %tid = call i32 @llvm.amdgcn.workitem.id.x()
596 %tid.ext = sext i32 %tid to i64
597 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
598 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
599 %a = load volatile float, float addrspace(1)* %a.gep
600 %min = call float @llvm.maxnum.f32(float -4.0, float %a)
601 %fneg = fsub float -0.000000e+00, %min
602 store float %fneg, float addrspace(1)* %out.gep
606 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
607 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
608 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
609 ; GCN: buffer_store_dword [[RESULT]]
610 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
611 %tid = call i32 @llvm.amdgcn.workitem.id.x()
612 %tid.ext = sext i32 %tid to i64
613 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
614 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
615 %a = load volatile float, float addrspace(1)* %a.gep
616 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
617 %fneg = fsub float -0.000000e+00, %max
618 store float %fneg, float addrspace(1)* %out.gep
622 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32:
623 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
624 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0
625 ; GCN: buffer_store_dword [[RESULT]]
626 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
627 %tid = call i32 @llvm.amdgcn.workitem.id.x()
628 %tid.ext = sext i32 %tid to i64
629 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
630 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
631 %a = load volatile float, float addrspace(1)* %a.gep
632 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
633 %fneg = fsub float -0.000000e+00, %max
634 store float %fneg, float addrspace(1)* %out.gep
638 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32:
639 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
640 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
641 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
642 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
643 ; GCN: buffer_store_dword [[RESULT]]
644 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
645 %tid = call i32 @llvm.amdgcn.workitem.id.x()
646 %tid.ext = sext i32 %tid to i64
647 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
648 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
649 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
650 %a = load volatile float, float addrspace(1)* %a.gep
651 %b = load volatile float, float addrspace(1)* %b.gep
652 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
653 %fneg = fsub float -0.000000e+00, %max
654 %mul = fmul float %fneg, %b
655 store float %mul, float addrspace(1)* %out.gep
659 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32:
660 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
661 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
662 ; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
663 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
664 ; GCN-NEXT: buffer_store_dword [[MAX0]]
665 ; GCN-NEXT: buffer_store_dword [[MUL1]]
666 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
667 %tid = call i32 @llvm.amdgcn.workitem.id.x()
668 %tid.ext = sext i32 %tid to i64
669 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
670 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
671 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
672 %a = load volatile float, float addrspace(1)* %a.gep
673 %b = load volatile float, float addrspace(1)* %b.gep
674 %min = call float @llvm.maxnum.f32(float %a, float %b)
675 %fneg = fsub float -0.000000e+00, %min
676 %use1 = fmul float %min, 4.0
677 store volatile float %fneg, float addrspace(1)* %out
678 store volatile float %use1, float addrspace(1)* %out
682 ; --------------------------------------------------------------------------------
684 ; --------------------------------------------------------------------------------
686 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
687 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
688 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
689 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
691 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
692 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
694 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
695 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]]
696 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
697 %tid = call i32 @llvm.amdgcn.workitem.id.x()
698 %tid.ext = sext i32 %tid to i64
699 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
700 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
701 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
702 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
703 %a = load volatile float, float addrspace(1)* %a.gep
704 %b = load volatile float, float addrspace(1)* %b.gep
705 %c = load volatile float, float addrspace(1)* %c.gep
706 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
707 %fneg = fsub float -0.000000e+00, %fma
708 store float %fneg, float addrspace(1)* %out.gep
712 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
713 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
714 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
715 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
716 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
717 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
718 ; GCN-NEXT: buffer_store_dword [[NEG_FMA]]
719 ; GCN-NEXT: buffer_store_dword [[FMA]]
720 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
721 %tid = call i32 @llvm.amdgcn.workitem.id.x()
722 %tid.ext = sext i32 %tid to i64
723 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
724 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
725 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
726 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
727 %a = load volatile float, float addrspace(1)* %a.gep
728 %b = load volatile float, float addrspace(1)* %b.gep
729 %c = load volatile float, float addrspace(1)* %c.gep
730 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
731 %fneg = fsub float -0.000000e+00, %fma
732 store volatile float %fneg, float addrspace(1)* %out
733 store volatile float %fma, float addrspace(1)* %out
737 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
738 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
739 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
740 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
742 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
743 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
744 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
746 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
747 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
749 ; GCN-NEXT: buffer_store_dword [[NEG_FMA]]
750 ; GCN-NEXT: buffer_store_dword [[MUL]]
751 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
752 %tid = call i32 @llvm.amdgcn.workitem.id.x()
753 %tid.ext = sext i32 %tid to i64
754 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
755 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
756 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
757 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
758 %a = load volatile float, float addrspace(1)* %a.gep
759 %b = load volatile float, float addrspace(1)* %b.gep
760 %c = load volatile float, float addrspace(1)* %c.gep
761 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
762 %fneg = fsub float -0.000000e+00, %fma
763 %use1 = fmul float %fma, 4.0
764 store volatile float %fneg, float addrspace(1)* %out
765 store volatile float %use1, float addrspace(1)* %out
769 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
770 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
771 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
772 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
774 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
775 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
777 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
778 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
779 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
780 %tid = call i32 @llvm.amdgcn.workitem.id.x()
781 %tid.ext = sext i32 %tid to i64
782 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
783 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
784 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
785 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
786 %a = load volatile float, float addrspace(1)* %a.gep
787 %b = load volatile float, float addrspace(1)* %b.gep
788 %c = load volatile float, float addrspace(1)* %c.gep
789 %fneg.a = fsub float -0.000000e+00, %a
790 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
791 %fneg = fsub float -0.000000e+00, %fma
792 store volatile float %fneg, float addrspace(1)* %out
796 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
797 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
798 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
799 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
801 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
802 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
804 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
805 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
806 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
807 %tid = call i32 @llvm.amdgcn.workitem.id.x()
808 %tid.ext = sext i32 %tid to i64
809 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
810 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
811 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
812 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
813 %a = load volatile float, float addrspace(1)* %a.gep
814 %b = load volatile float, float addrspace(1)* %b.gep
815 %c = load volatile float, float addrspace(1)* %c.gep
816 %fneg.b = fsub float -0.000000e+00, %b
817 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
818 %fneg = fsub float -0.000000e+00, %fma
819 store volatile float %fneg, float addrspace(1)* %out
823 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
824 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
825 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
826 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
828 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
829 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
831 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
832 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
833 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
834 %tid = call i32 @llvm.amdgcn.workitem.id.x()
835 %tid.ext = sext i32 %tid to i64
836 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
837 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
838 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
839 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
840 %a = load volatile float, float addrspace(1)* %a.gep
841 %b = load volatile float, float addrspace(1)* %b.gep
842 %c = load volatile float, float addrspace(1)* %c.gep
843 %fneg.a = fsub float -0.000000e+00, %a
844 %fneg.b = fsub float -0.000000e+00, %b
845 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
846 %fneg = fsub float -0.000000e+00, %fma
847 store volatile float %fneg, float addrspace(1)* %out
851 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
852 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
853 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
854 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
856 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
857 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
859 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
860 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
861 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
862 %tid = call i32 @llvm.amdgcn.workitem.id.x()
863 %tid.ext = sext i32 %tid to i64
864 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
865 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
866 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
868 %a = load volatile float, float addrspace(1)* %a.gep
869 %b = load volatile float, float addrspace(1)* %b.gep
870 %c = load volatile float, float addrspace(1)* %c.gep
871 %fneg.a = fsub float -0.000000e+00, %a
872 %fneg.c = fsub float -0.000000e+00, %c
873 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
874 %fneg = fsub float -0.000000e+00, %fma
875 store volatile float %fneg, float addrspace(1)* %out
879 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
880 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
881 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
882 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
884 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
885 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
887 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
888 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
889 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
890 %tid = call i32 @llvm.amdgcn.workitem.id.x()
891 %tid.ext = sext i32 %tid to i64
892 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
893 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
894 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
895 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
896 %a = load volatile float, float addrspace(1)* %a.gep
897 %b = load volatile float, float addrspace(1)* %b.gep
898 %c = load volatile float, float addrspace(1)* %c.gep
899 %fneg.c = fsub float -0.000000e+00, %c
900 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
901 %fneg = fsub float -0.000000e+00, %fma
902 store volatile float %fneg, float addrspace(1)* %out
906 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
907 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
908 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
909 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
911 ; GCN-SAFE: v_xor_b32
912 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
913 ; GCN-SAFE: v_xor_b32
915 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
916 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
917 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]]
918 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]]
919 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
920 %tid = call i32 @llvm.amdgcn.workitem.id.x()
921 %tid.ext = sext i32 %tid to i64
922 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
923 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
924 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
925 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
926 %a = load volatile float, float addrspace(1)* %a.gep
927 %b = load volatile float, float addrspace(1)* %b.gep
928 %c = load volatile float, float addrspace(1)* %c.gep
929 %fneg.a = fsub float -0.000000e+00, %a
930 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
931 %fneg = fsub float -0.000000e+00, %fma
932 store volatile float %fneg, float addrspace(1)* %out
933 store volatile float %fneg.a, float addrspace(1)* %out
937 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
938 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
939 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
940 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
942 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
943 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
944 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
946 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
947 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_FMA]]
948 ; GCN-NSZ-NEXT: buffer_store_dword [[MUL]]
949 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
950 %tid = call i32 @llvm.amdgcn.workitem.id.x()
951 %tid.ext = sext i32 %tid to i64
952 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
953 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
954 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
955 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
956 %a = load volatile float, float addrspace(1)* %a.gep
957 %b = load volatile float, float addrspace(1)* %b.gep
958 %c = load volatile float, float addrspace(1)* %c.gep
959 %fneg.a = fsub float -0.000000e+00, %a
960 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
961 %fneg = fsub float -0.000000e+00, %fma
962 %use1 = fmul float %fneg.a, %d
963 store volatile float %fneg, float addrspace(1)* %out
964 store volatile float %use1, float addrspace(1)* %out
968 ; --------------------------------------------------------------------------------
970 ; --------------------------------------------------------------------------------
972 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
973 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
974 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
975 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
977 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
978 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
980 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
981 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]]
982 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
983 %tid = call i32 @llvm.amdgcn.workitem.id.x()
984 %tid.ext = sext i32 %tid to i64
985 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
986 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
987 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
988 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
989 %a = load volatile float, float addrspace(1)* %a.gep
990 %b = load volatile float, float addrspace(1)* %b.gep
991 %c = load volatile float, float addrspace(1)* %c.gep
992 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
993 %fneg = fsub float -0.000000e+00, %fma
994 store float %fneg, float addrspace(1)* %out.gep
998 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
999 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1000 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1001 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1003 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1004 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1005 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1007 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]]
1008 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1010 ; GCN: buffer_store_dword [[NEG_MAD]]
1011 ; GCN-NEXT: buffer_store_dword [[MUL]]
1012 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1013 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1014 %tid.ext = sext i32 %tid to i64
1015 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1016 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1017 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1018 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1019 %a = load volatile float, float addrspace(1)* %a.gep
1020 %b = load volatile float, float addrspace(1)* %b.gep
1021 %c = load volatile float, float addrspace(1)* %c.gep
1022 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1023 %fneg = fsub float -0.000000e+00, %fma
1024 %use1 = fmul float %fma, 4.0
1025 store volatile float %fneg, float addrspace(1)* %out
1026 store volatile float %use1, float addrspace(1)* %out
1030 ; --------------------------------------------------------------------------------
1032 ; --------------------------------------------------------------------------------
1034 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1035 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1036 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1037 ; GCN: buffer_store_dwordx2 [[RESULT]]
1038 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1039 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1040 %tid.ext = sext i32 %tid to i64
1041 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1042 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1043 %a = load volatile float, float addrspace(1)* %a.gep
1044 %fpext = fpext float %a to double
1045 %fneg = fsub double -0.000000e+00, %fpext
1046 store double %fneg, double addrspace(1)* %out.gep
1050 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1051 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1052 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1053 ; GCN: buffer_store_dwordx2 [[RESULT]]
1054 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1055 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1056 %tid.ext = sext i32 %tid to i64
1057 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1058 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1059 %a = load volatile float, float addrspace(1)* %a.gep
1060 %fneg.a = fsub float -0.000000e+00, %a
1061 %fpext = fpext float %fneg.a to double
1062 %fneg = fsub double -0.000000e+00, %fpext
1063 store double %fneg, double addrspace(1)* %out.gep
1067 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1068 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1069 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1070 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1071 ; GCN: buffer_store_dwordx2 [[RESULT]]
1072 ; GCN: buffer_store_dword [[FNEG_A]]
1073 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1074 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1075 %tid.ext = sext i32 %tid to i64
1076 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1077 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1078 %a = load volatile float, float addrspace(1)* %a.gep
1079 %fneg.a = fsub float -0.000000e+00, %a
1080 %fpext = fpext float %fneg.a to double
1081 %fneg = fsub double -0.000000e+00, %fpext
1082 store volatile double %fneg, double addrspace(1)* %out.gep
1083 store volatile float %fneg.a, float addrspace(1)* undef
1087 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1088 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1089 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1090 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1091 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1092 ; GCN: buffer_store_dwordx2 v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1093 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1094 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1095 %tid.ext = sext i32 %tid to i64
1096 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1097 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1098 %a = load volatile float, float addrspace(1)* %a.gep
1099 %fpext = fpext float %a to double
1100 %fneg = fsub double -0.000000e+00, %fpext
1101 store volatile double %fneg, double addrspace(1)* %out.gep
1102 store volatile double %fpext, double addrspace(1)* undef
1106 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1107 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1108 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1109 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1110 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1111 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1112 ; GCN: buffer_store_dwordx2 [[MUL]]
1113 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1114 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1115 %tid.ext = sext i32 %tid to i64
1116 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1117 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1118 %a = load volatile float, float addrspace(1)* %a.gep
1119 %fpext = fpext float %a to double
1120 %fneg = fsub double -0.000000e+00, %fpext
1121 %mul = fmul double %fpext, 4.0
1122 store volatile double %fneg, double addrspace(1)* %out.gep
1123 store volatile double %mul, double addrspace(1)* %out.gep
1127 ; FIXME: Source modifiers not folded for f16->f32
1128 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1129 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1130 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1131 %tid.ext = sext i32 %tid to i64
1132 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1133 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1134 %a = load volatile half, half addrspace(1)* %a.gep
1135 %fpext = fpext half %a to float
1136 %fneg = fsub float -0.000000e+00, %fpext
1137 store volatile float %fneg, float addrspace(1)* %out.gep
1138 store volatile float %fpext, float addrspace(1)* %out.gep
1142 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1143 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1145 %tid.ext = sext i32 %tid to i64
1146 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1147 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1148 %a = load volatile half, half addrspace(1)* %a.gep
1149 %fpext = fpext half %a to float
1150 %fneg = fsub float -0.000000e+00, %fpext
1151 %mul = fmul float %fpext, 4.0
1152 store volatile float %fneg, float addrspace(1)* %out.gep
1153 store volatile float %mul, float addrspace(1)* %out.gep
1157 ; --------------------------------------------------------------------------------
1159 ; --------------------------------------------------------------------------------
1161 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1162 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1163 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1164 ; GCN: buffer_store_dword [[RESULT]]
1165 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1166 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1167 %tid.ext = sext i32 %tid to i64
1168 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1169 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1170 %a = load volatile double, double addrspace(1)* %a.gep
1171 %fpround = fptrunc double %a to float
1172 %fneg = fsub float -0.000000e+00, %fpround
1173 store float %fneg, float addrspace(1)* %out.gep
1177 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1178 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1179 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1180 ; GCN: buffer_store_dword [[RESULT]]
1181 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1182 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1183 %tid.ext = sext i32 %tid to i64
1184 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1185 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1186 %a = load volatile double, double addrspace(1)* %a.gep
1187 %fneg.a = fsub double -0.000000e+00, %a
1188 %fpround = fptrunc double %fneg.a to float
1189 %fneg = fsub float -0.000000e+00, %fpround
1190 store float %fneg, float addrspace(1)* %out.gep
1194 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1195 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1196 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1197 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1198 ; GCN: buffer_store_dword [[RESULT]]
1199 ; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1200 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1202 %tid.ext = sext i32 %tid to i64
1203 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1204 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1205 %a = load volatile double, double addrspace(1)* %a.gep
1206 %fneg.a = fsub double -0.000000e+00, %a
1207 %fpround = fptrunc double %fneg.a to float
1208 %fneg = fsub float -0.000000e+00, %fpround
1209 store volatile float %fneg, float addrspace(1)* %out.gep
1210 store volatile double %fneg.a, double addrspace(1)* undef
1214 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1215 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1216 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1217 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1218 ; GCN: buffer_store_dword [[RESULT]]
1219 ; GCN: buffer_store_dwordx2 [[USE1]]
1220 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1221 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1222 %tid.ext = sext i32 %tid to i64
1223 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1224 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1225 %a = load volatile double, double addrspace(1)* %a.gep
1226 %fneg.a = fsub double -0.000000e+00, %a
1227 %fpround = fptrunc double %fneg.a to float
1228 %fneg = fsub float -0.000000e+00, %fpround
1229 %use1 = fmul double %fneg.a, %c
1230 store volatile float %fneg, float addrspace(1)* %out.gep
1231 store volatile double %use1, double addrspace(1)* undef
1235 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1236 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1237 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1238 ; GCN: buffer_store_short [[RESULT]]
1239 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1240 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1241 %tid.ext = sext i32 %tid to i64
1242 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1243 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1244 %a = load volatile float, float addrspace(1)* %a.gep
1245 %fpround = fptrunc float %a to half
1246 %fneg = fsub half -0.000000e+00, %fpround
1247 store half %fneg, half addrspace(1)* %out.gep
1251 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1252 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1253 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1254 ; GCN: buffer_store_short [[RESULT]]
1255 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1256 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1257 %tid.ext = sext i32 %tid to i64
1258 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1259 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1260 %a = load volatile float, float addrspace(1)* %a.gep
1261 %fneg.a = fsub float -0.000000e+00, %a
1262 %fpround = fptrunc float %fneg.a to half
1263 %fneg = fsub half -0.000000e+00, %fpround
1264 store half %fneg, half addrspace(1)* %out.gep
1268 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1269 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1270 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1271 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1272 ; GCN: buffer_store_dword [[NEG]]
1273 ; GCN: buffer_store_dword [[CVT]]
1274 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1275 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1276 %tid.ext = sext i32 %tid to i64
1277 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1278 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1279 %a = load volatile double, double addrspace(1)* %a.gep
1280 %fpround = fptrunc double %a to float
1281 %fneg = fsub float -0.000000e+00, %fpround
1282 store volatile float %fneg, float addrspace(1)* %out.gep
1283 store volatile float %fpround, float addrspace(1)* %out.gep
1287 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1288 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1289 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1290 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1291 ; GCN: buffer_store_short [[RESULT]]
1292 ; GCN: buffer_store_dword [[NEG_A]]
1293 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1294 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1295 %tid.ext = sext i32 %tid to i64
1296 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1297 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1298 %a = load volatile float, float addrspace(1)* %a.gep
1299 %fneg.a = fsub float -0.000000e+00, %a
1300 %fpround = fptrunc float %fneg.a to half
1301 %fneg = fsub half -0.000000e+00, %fpround
1302 store volatile half %fneg, half addrspace(1)* %out.gep
1303 store volatile float %fneg.a, float addrspace(1)* undef
1307 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1308 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1309 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1310 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1311 ; GCN: buffer_store_short [[RESULT]]
1312 ; GCN: buffer_store_dword [[USE1]]
1313 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1314 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1315 %tid.ext = sext i32 %tid to i64
1316 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1317 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1318 %a = load volatile float, float addrspace(1)* %a.gep
1319 %fneg.a = fsub float -0.000000e+00, %a
1320 %fpround = fptrunc float %fneg.a to half
1321 %fneg = fsub half -0.000000e+00, %fpround
1322 %use1 = fmul float %fneg.a, %c
1323 store volatile half %fneg, half addrspace(1)* %out.gep
1324 store volatile float %use1, float addrspace(1)* undef
1328 ; --------------------------------------------------------------------------------
1330 ; --------------------------------------------------------------------------------
1332 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1333 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1334 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1335 ; GCN: buffer_store_dword [[RESULT]]
1336 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1337 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1338 %tid.ext = sext i32 %tid to i64
1339 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1340 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1341 %a = load volatile float, float addrspace(1)* %a.gep
1342 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1343 %fneg = fsub float -0.000000e+00, %rcp
1344 store float %fneg, float addrspace(1)* %out.gep
1348 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1349 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1350 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1351 ; GCN: buffer_store_dword [[RESULT]]
1352 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1353 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1354 %tid.ext = sext i32 %tid to i64
1355 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1356 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1357 %a = load volatile float, float addrspace(1)* %a.gep
1358 %fneg.a = fsub float -0.000000e+00, %a
1359 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1360 %fneg = fsub float -0.000000e+00, %rcp
1361 store float %fneg, float addrspace(1)* %out.gep
1365 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1366 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1367 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1368 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1369 ; GCN: buffer_store_dword [[RESULT]]
1370 ; GCN: buffer_store_dword [[NEG_A]]
1371 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1372 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1373 %tid.ext = sext i32 %tid to i64
1374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1375 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1376 %a = load volatile float, float addrspace(1)* %a.gep
1377 %fneg.a = fsub float -0.000000e+00, %a
1378 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1379 %fneg = fsub float -0.000000e+00, %rcp
1380 store volatile float %fneg, float addrspace(1)* %out.gep
1381 store volatile float %fneg.a, float addrspace(1)* undef
1385 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1386 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1387 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1388 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1389 ; GCN: buffer_store_dword [[RESULT]]
1390 ; GCN: buffer_store_dword [[MUL]]
1391 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1392 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1393 %tid.ext = sext i32 %tid to i64
1394 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1395 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1396 %a = load volatile float, float addrspace(1)* %a.gep
1397 %fneg.a = fsub float -0.000000e+00, %a
1398 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1399 %fneg = fsub float -0.000000e+00, %rcp
1400 %use1 = fmul float %fneg.a, %c
1401 store volatile float %fneg, float addrspace(1)* %out.gep
1402 store volatile float %use1, float addrspace(1)* undef
1406 ; --------------------------------------------------------------------------------
1408 ; --------------------------------------------------------------------------------
1410 ; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32:
1411 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1412 ; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1413 ; GCN: buffer_store_dword [[RESULT]]
1414 define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1415 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1416 %tid.ext = sext i32 %tid to i64
1417 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1418 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1419 %a = load volatile float, float addrspace(1)* %a.gep
1420 %rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
1421 %fneg = fsub float -0.000000e+00, %rcp
1422 store float %fneg, float addrspace(1)* %out.gep
1426 ; --------------------------------------------------------------------------------
1428 ; --------------------------------------------------------------------------------
1430 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1431 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1432 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1433 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1434 ; GCN-NEXT: buffer_store_dword [[RESULT]]
1435 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1436 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1437 %tid.ext = sext i32 %tid to i64
1438 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1439 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1440 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1441 %a = load volatile float, float addrspace(1)* %a.gep
1442 %b = load volatile float, float addrspace(1)* %b.gep
1443 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1444 %fneg = fsub float -0.000000e+00, %mul
1445 store float %fneg, float addrspace(1)* %out.gep
1449 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1450 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1451 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1452 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1453 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1454 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1455 ; GCN: buffer_store_dword [[ADD]]
1456 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1457 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1458 %tid.ext = sext i32 %tid to i64
1459 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1460 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1461 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1462 %a = load volatile float, float addrspace(1)* %a.gep
1463 %b = load volatile float, float addrspace(1)* %b.gep
1464 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1465 %fneg = fsub float -0.000000e+00, %mul
1466 store volatile float %fneg, float addrspace(1)* %out
1467 store volatile float %mul, float addrspace(1)* %out
1471 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1472 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1473 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1474 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1475 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1476 ; GCN-NEXT: buffer_store_dword [[ADD]]
1477 ; GCN-NEXT: buffer_store_dword [[MUL]]
1478 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1479 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1480 %tid.ext = sext i32 %tid to i64
1481 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1482 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1484 %a = load volatile float, float addrspace(1)* %a.gep
1485 %b = load volatile float, float addrspace(1)* %b.gep
1486 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1487 %fneg = fsub float -0.000000e+00, %mul
1488 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1489 store volatile float %fneg, float addrspace(1)* %out
1490 store volatile float %use1, float addrspace(1)* %out
1494 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1495 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1496 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1497 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1498 ; GCN-NEXT: buffer_store_dword [[ADD]]
1499 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1500 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1501 %tid.ext = sext i32 %tid to i64
1502 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1503 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1504 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1505 %a = load volatile float, float addrspace(1)* %a.gep
1506 %b = load volatile float, float addrspace(1)* %b.gep
1507 %fneg.a = fsub float -0.000000e+00, %a
1508 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1509 %fneg = fsub float -0.000000e+00, %mul
1510 store volatile float %fneg, float addrspace(1)* %out
1514 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1515 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1516 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1517 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1518 ; GCN-NEXT: buffer_store_dword [[ADD]]
1519 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1520 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1521 %tid.ext = sext i32 %tid to i64
1522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1525 %a = load volatile float, float addrspace(1)* %a.gep
1526 %b = load volatile float, float addrspace(1)* %b.gep
1527 %fneg.b = fsub float -0.000000e+00, %b
1528 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1529 %fneg = fsub float -0.000000e+00, %mul
1530 store volatile float %fneg, float addrspace(1)* %out
1534 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1535 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1536 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1537 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1538 ; GCN-NEXT: buffer_store_dword [[ADD]]
1539 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1540 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1541 %tid.ext = sext i32 %tid to i64
1542 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1543 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1544 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1545 %a = load volatile float, float addrspace(1)* %a.gep
1546 %b = load volatile float, float addrspace(1)* %b.gep
1547 %fneg.a = fsub float -0.000000e+00, %a
1548 %fneg.b = fsub float -0.000000e+00, %b
1549 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1550 %fneg = fsub float -0.000000e+00, %mul
1551 store volatile float %fneg, float addrspace(1)* %out
1555 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1556 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1557 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1558 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1559 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1560 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1561 ; GCN: buffer_store_dword [[NEG_A]]
1562 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1563 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1564 %tid.ext = sext i32 %tid to i64
1565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1566 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1567 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1568 %a = load volatile float, float addrspace(1)* %a.gep
1569 %b = load volatile float, float addrspace(1)* %b.gep
1570 %fneg.a = fsub float -0.000000e+00, %a
1571 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1572 %fneg = fsub float -0.000000e+00, %mul
1573 store volatile float %fneg, float addrspace(1)* %out
1574 store volatile float %fneg.a, float addrspace(1)* %out
1578 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1579 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1580 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1581 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1582 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1583 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1584 ; GCN: buffer_store_dword [[MUL]]
1585 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1586 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1587 %tid.ext = sext i32 %tid to i64
1588 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1589 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1590 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1591 %a = load volatile float, float addrspace(1)* %a.gep
1592 %b = load volatile float, float addrspace(1)* %b.gep
1593 %fneg.a = fsub float -0.000000e+00, %a
1594 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1595 %fneg = fsub float -0.000000e+00, %mul
1596 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1597 store volatile float %fneg, float addrspace(1)* %out
1598 store volatile float %use1, float addrspace(1)* %out
1602 ; --------------------------------------------------------------------------------
1604 ; --------------------------------------------------------------------------------
1606 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1607 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1608 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1609 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1610 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1611 ; GCN: buffer_store_dword [[RESULT]]
1612 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1613 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1614 %tid.ext = sext i32 %tid to i64
1615 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1616 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1617 %a = load volatile float, float addrspace(1)* %a.gep
1618 %sin = call float @llvm.sin.f32(float %a)
1619 %fneg = fsub float -0.000000e+00, %sin
1620 store float %fneg, float addrspace(1)* %out.gep
1624 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1625 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1626 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1627 ; GCN: buffer_store_dword [[RESULT]]
1628 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1629 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1630 %tid.ext = sext i32 %tid to i64
1631 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1632 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1633 %a = load volatile float, float addrspace(1)* %a.gep
1634 %sin = call float @llvm.amdgcn.sin.f32(float %a)
1635 %fneg = fsub float -0.0, %sin
1636 store float %fneg, float addrspace(1)* %out.gep
1640 ; --------------------------------------------------------------------------------
1642 ; --------------------------------------------------------------------------------
1644 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
1645 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1646 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1647 ; GCN: buffer_store_dword [[RESULT]]
1648 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1649 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1650 %tid.ext = sext i32 %tid to i64
1651 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1652 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1653 %a = load volatile float, float addrspace(1)* %a.gep
1654 %trunc = call float @llvm.trunc.f32(float %a)
1655 %fneg = fsub float -0.0, %trunc
1656 store float %fneg, float addrspace(1)* %out.gep
1660 ; --------------------------------------------------------------------------------
1662 ; --------------------------------------------------------------------------------
1664 ; GCN-LABEL: {{^}}v_fneg_round_f32:
1665 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1666 ; GCN: v_trunc_f32_e32
1667 ; GCN: v_sub_f32_e32
1668 ; GCN: v_cndmask_b32
1670 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
1671 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
1673 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
1674 ; GCN: buffer_store_dword [[RESULT]]
1675 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1676 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1677 %tid.ext = sext i32 %tid to i64
1678 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1679 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1680 %a = load volatile float, float addrspace(1)* %a.gep
1681 %round = call float @llvm.round.f32(float %a)
1682 %fneg = fsub float -0.0, %round
1683 store float %fneg, float addrspace(1)* %out.gep
1687 ; --------------------------------------------------------------------------------
1689 ; --------------------------------------------------------------------------------
1691 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
1692 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1693 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1694 ; GCN: buffer_store_dword [[RESULT]]
1695 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1696 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1697 %tid.ext = sext i32 %tid to i64
1698 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1699 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1700 %a = load volatile float, float addrspace(1)* %a.gep
1701 %rint = call float @llvm.rint.f32(float %a)
1702 %fneg = fsub float -0.0, %rint
1703 store float %fneg, float addrspace(1)* %out.gep
1707 ; --------------------------------------------------------------------------------
1709 ; --------------------------------------------------------------------------------
1711 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
1712 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1713 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1714 ; GCN: buffer_store_dword [[RESULT]]
1715 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1716 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1717 %tid.ext = sext i32 %tid to i64
1718 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1719 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1720 %a = load volatile float, float addrspace(1)* %a.gep
1721 %nearbyint = call float @llvm.nearbyint.f32(float %a)
1722 %fneg = fsub float -0.0, %nearbyint
1723 store float %fneg, float addrspace(1)* %out.gep
1727 ; --------------------------------------------------------------------------------
1729 ; --------------------------------------------------------------------------------
1731 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
1732 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1733 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1734 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
1735 ; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]]
1736 ; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]]
1737 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1738 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1739 %tid.ext = sext i32 %tid to i64
1740 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1741 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1742 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1743 %a = load volatile float, float addrspace(1)* %a.gep
1744 %b = load volatile float, float addrspace(1)* %b.gep
1745 %mul = fmul float %a, %b
1746 %fneg = fsub float -0.0, %mul
1747 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
1748 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
1749 store volatile float %intrp0, float addrspace(1)* %out.gep
1750 store volatile float %intrp1, float addrspace(1)* %out.gep
1754 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
1755 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1756 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1757 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
1758 ; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]]
1759 ; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]]
1760 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1761 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1762 %tid.ext = sext i32 %tid to i64
1763 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1764 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1765 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1766 %a = load volatile float, float addrspace(1)* %a.gep
1767 %b = load volatile float, float addrspace(1)* %b.gep
1768 %mul = fmul float %a, %b
1769 %fneg = fsub float -0.0, %mul
1770 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
1771 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
1772 store volatile float %intrp0, float addrspace(1)* %out.gep
1773 store volatile float %intrp1, float addrspace(1)* %out.gep
1777 ; --------------------------------------------------------------------------------
1779 ; --------------------------------------------------------------------------------
1781 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
1782 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1783 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1784 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1785 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
1786 ; GCN: s_cbranch_scc1
1788 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
1789 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
1790 ; GCN: buffer_store_dword [[MUL1]]
1792 ; GCN: buffer_store_dword [[MUL0]]
1793 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
1794 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1795 %tid.ext = sext i32 %tid to i64
1796 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1797 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1798 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1799 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1800 %a = load volatile float, float addrspace(1)* %a.gep
1801 %b = load volatile float, float addrspace(1)* %b.gep
1802 %c = load volatile float, float addrspace(1)* %c.gep
1803 %mul = fmul float %a, %b
1804 %fneg = fsub float -0.0, %mul
1805 %cmp0 = icmp eq i32 %d, 0
1806 br i1 %cmp0, label %if, label %endif
1809 %mul1 = fmul float %fneg, %c
1810 store volatile float %mul1, float addrspace(1)* %out.gep
1814 store volatile float %mul, float addrspace(1)* %out.gep
1818 ; --------------------------------------------------------------------------------
1820 ; --------------------------------------------------------------------------------
1822 ; Can't fold into use, so should fold into source
1823 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
1824 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1825 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1826 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
1827 ; GCN: ; use [[MUL]]
1828 ; GCN: buffer_store_dword [[MUL]]
1829 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
1830 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1831 %tid.ext = sext i32 %tid to i64
1832 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1833 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1834 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1835 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1836 %a = load volatile float, float addrspace(1)* %a.gep
1837 %b = load volatile float, float addrspace(1)* %b.gep
1838 %c = load volatile float, float addrspace(1)* %c.gep
1839 %mul = fmul float %a, %b
1840 %fneg = fsub float -0.0, %mul
1841 call void asm sideeffect "; use $0", "v"(float %fneg) #0
1842 store volatile float %fneg, float addrspace(1)* %out.gep
1846 ; --------------------------------------------------------------------------------
1848 ; --------------------------------------------------------------------------------
1850 ; Can't fold into use, so should fold into source
1851 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
1852 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1853 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1854 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
1855 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
1856 ; GCN: ; use [[NEG]]
1857 ; GCN: buffer_store_dword [[MUL]]
1858 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
1859 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1860 %tid.ext = sext i32 %tid to i64
1861 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1862 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1863 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1864 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1865 %a = load volatile float, float addrspace(1)* %a.gep
1866 %b = load volatile float, float addrspace(1)* %b.gep
1867 %c = load volatile float, float addrspace(1)* %c.gep
1868 %mul = fmul float %a, %b
1869 %fneg = fsub float -0.0, %mul
1870 call void asm sideeffect "; use $0", "v"(float %fneg) #0
1871 store volatile float %mul, float addrspace(1)* %out.gep
1875 ; --------------------------------------------------------------------------------
1876 ; code size regression tests
1877 ; --------------------------------------------------------------------------------
1879 ; There are multiple users of the fneg that must use a VOP3
1880 ; instruction, so there is no penalty
1881 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
1882 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1883 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1884 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1886 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
1887 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
1888 ; GCN-NEXT: buffer_store_dword [[FMA0]]
1889 ; GCN-NEXT: buffer_store_dword [[FMA1]]
1890 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1891 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1892 %tid.ext = sext i32 %tid to i64
1893 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1894 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1895 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1896 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1897 %a = load volatile float, float addrspace(1)* %a.gep
1898 %b = load volatile float, float addrspace(1)* %b.gep
1899 %c = load volatile float, float addrspace(1)* %c.gep
1901 %fneg.a = fsub float -0.0, %a
1902 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1903 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
1905 store volatile float %fma0, float addrspace(1)* %out
1906 store volatile float %fma1, float addrspace(1)* %out
1910 ; There are multiple users, but both require using a larger encoding
1913 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
1914 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1915 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1916 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1918 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
1919 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
1920 ; GCN-NEXT: buffer_store_dword [[MUL0]]
1921 ; GCN-NEXT: buffer_store_dword [[MUL1]]
1922 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1923 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1924 %tid.ext = sext i32 %tid to i64
1925 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1926 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1927 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1928 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1929 %a = load volatile float, float addrspace(1)* %a.gep
1930 %b = load volatile float, float addrspace(1)* %b.gep
1931 %c = load volatile float, float addrspace(1)* %c.gep
1933 %fneg.a = fsub float -0.0, %a
1934 %mul0 = fmul float %fneg.a, %b
1935 %mul1 = fmul float %fneg.a, %c
1937 store volatile float %mul0, float addrspace(1)* %out
1938 store volatile float %mul1, float addrspace(1)* %out
1942 ; One user is VOP3 so has no cost to folding the modifier, the other does.
1943 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
1944 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1945 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1946 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1948 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
1949 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
1951 ; GCN: buffer_store_dword [[FMA0]]
1952 ; GCN-NEXT: buffer_store_dword [[MUL1]]
1953 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1954 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1955 %tid.ext = sext i32 %tid to i64
1956 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1957 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1958 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1959 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1960 %a = load volatile float, float addrspace(1)* %a.gep
1961 %b = load volatile float, float addrspace(1)* %b.gep
1962 %c = load volatile float, float addrspace(1)* %c.gep
1964 %fneg.a = fsub float -0.0, %a
1965 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
1966 %mul1 = fmul float %fneg.a, %c
1968 store volatile float %fma0, float addrspace(1)* %out
1969 store volatile float %mul1, float addrspace(1)* %out
1973 ; The use of the fneg requires a code size increase, but folding into
1974 ; the source does not
1976 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
1977 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1978 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1979 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1980 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
1982 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
1983 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
1984 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
1986 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
1987 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
1988 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
1990 ; GCN: buffer_store_dword [[MUL1]]
1991 ; GCN-NEXT: buffer_store_dword [[MUL2]]
1992 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
1993 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1994 %tid.ext = sext i32 %tid to i64
1995 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1996 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1997 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1998 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
1999 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2000 %a = load volatile float, float addrspace(1)* %a.gep
2001 %b = load volatile float, float addrspace(1)* %b.gep
2002 %c = load volatile float, float addrspace(1)* %c.gep
2003 %d = load volatile float, float addrspace(1)* %d.gep
2005 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2006 %fneg.fma0 = fsub float -0.0, %fma0
2007 %mul1 = fmul float %fneg.fma0, %c
2008 %mul2 = fmul float %fneg.fma0, %d
2010 store volatile float %mul1, float addrspace(1)* %out
2011 store volatile float %mul2, float addrspace(1)* %out
2015 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2016 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2017 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2018 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2019 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2021 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2022 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2023 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2025 ; GCN: buffer_store_dwordx2 [[MUL0]]
2026 ; GCN: buffer_store_dwordx2 [[MUL1]]
2027 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2028 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2029 %tid.ext = sext i32 %tid to i64
2030 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2031 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2032 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2033 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2034 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2035 %a = load volatile double, double addrspace(1)* %a.gep
2036 %b = load volatile double, double addrspace(1)* %b.gep
2037 %c = load volatile double, double addrspace(1)* %c.gep
2038 %d = load volatile double, double addrspace(1)* %d.gep
2040 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2041 %fneg.fma0 = fsub double -0.0, %fma0
2042 %mul1 = fmul double %fneg.fma0, %c
2043 %mul2 = fmul double %fneg.fma0, %d
2045 store volatile double %mul1, double addrspace(1)* %out
2046 store volatile double %mul2, double addrspace(1)* %out
2050 ; %trunc.a has one fneg use, but it requires a code size increase and
2051 ; %the fneg can instead be folded for free into the fma.
2053 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2054 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2055 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2056 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2057 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2058 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2059 ; GCN: buffer_store_dword [[FMA0]]
2060 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2061 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2062 %tid.ext = sext i32 %tid to i64
2063 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2064 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2065 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2066 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2067 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2068 %a = load volatile float, float addrspace(1)* %a.gep
2069 %b = load volatile float, float addrspace(1)* %b.gep
2070 %c = load volatile float, float addrspace(1)* %c.gep
2071 %d = load volatile float, float addrspace(1)* %d.gep
2073 %trunc.a = call float @llvm.trunc.f32(float %a)
2074 %trunc.fneg.a = fsub float -0.0, %trunc.a
2075 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2076 store volatile float %fma0, float addrspace(1)* %out
2080 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2081 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2082 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2083 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2084 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2085 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2086 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2087 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2088 ; GCN: buffer_store_dword [[FMA0]]
2089 ; GCN: buffer_store_dword [[MUL1]]
2090 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2091 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2092 %tid.ext = sext i32 %tid to i64
2093 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2094 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2095 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2096 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2097 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2098 %a = load volatile float, float addrspace(1)* %a.gep
2099 %b = load volatile float, float addrspace(1)* %b.gep
2100 %c = load volatile float, float addrspace(1)* %c.gep
2101 %d = load volatile float, float addrspace(1)* %d.gep
2103 %trunc.a = call float @llvm.trunc.f32(float %a)
2104 %trunc.fneg.a = fsub float -0.0, %trunc.a
2105 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2106 %mul1 = fmul float %trunc.a, %d
2107 store volatile float %fma0, float addrspace(1)* %out
2108 store volatile float %mul1, float addrspace(1)* %out
2112 declare i32 @llvm.amdgcn.workitem.id.x() #1
2113 declare float @llvm.fma.f32(float, float, float) #1
2114 declare float @llvm.fmuladd.f32(float, float, float) #1
2115 declare float @llvm.sin.f32(float) #1
2116 declare float @llvm.trunc.f32(float) #1
2117 declare float @llvm.round.f32(float) #1
2118 declare float @llvm.rint.f32(float) #1
2119 declare float @llvm.nearbyint.f32(float) #1
2120 declare float @llvm.minnum.f32(float, float) #1
2121 declare float @llvm.maxnum.f32(float, float) #1
2123 declare double @llvm.fma.f64(double, double, double) #1
2125 declare float @llvm.amdgcn.sin.f32(float) #1
2126 declare float @llvm.amdgcn.rcp.f32(float) #1
2127 declare float @llvm.amdgcn.rcp.legacy(float) #1
2128 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2129 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2130 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2132 attributes #0 = { nounwind }
2133 attributes #1 = { nounwind readnone }