1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
7 ; --------------------------------------------------------------------------------
9 ; --------------------------------------------------------------------------------
11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
21 %tid = call i32 @llvm.amdgcn.workitem.id.x()
22 %tid.ext = sext i32 %tid to i64
23 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
24 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
25 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
26 %a = load volatile float, float addrspace(1)* %a.gep
27 %b = load volatile float, float addrspace(1)* %b.gep
28 %add = fadd float %a, %b
29 %fneg = fsub float -0.000000e+00, %add
30 store float %fneg, float addrspace(1)* %out.gep
34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
42 %tid = call i32 @llvm.amdgcn.workitem.id.x()
43 %tid.ext = sext i32 %tid to i64
44 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
45 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
46 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
47 %a = load volatile float, float addrspace(1)* %a.gep
48 %b = load volatile float, float addrspace(1)* %b.gep
49 %add = fadd float %a, %b
50 %fneg = fsub float -0.000000e+00, %add
51 store volatile float %fneg, float addrspace(1)* %out
52 store volatile float %add, float addrspace(1)* %out
56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
70 %tid = call i32 @llvm.amdgcn.workitem.id.x()
71 %tid.ext = sext i32 %tid to i64
72 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
73 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75 %a = load volatile float, float addrspace(1)* %a.gep
76 %b = load volatile float, float addrspace(1)* %b.gep
77 %add = fadd float %a, %b
78 %fneg = fsub float -0.000000e+00, %add
79 %use1 = fmul float %add, 4.0
80 store volatile float %fneg, float addrspace(1)* %out
81 store volatile float %use1, float addrspace(1)* %out
85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
89 ; GCN-SAFE: v_sub_f32_e32
90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
96 %tid = call i32 @llvm.amdgcn.workitem.id.x()
97 %tid.ext = sext i32 %tid to i64
98 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
99 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
100 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
101 %a = load volatile float, float addrspace(1)* %a.gep
102 %b = load volatile float, float addrspace(1)* %b.gep
103 %fneg.a = fsub float -0.000000e+00, %a
104 %add = fadd float %fneg.a, %b
105 %fneg = fsub float -0.000000e+00, %add
106 store volatile float %fneg, float addrspace(1)* %out
110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120 %tid = call i32 @llvm.amdgcn.workitem.id.x()
121 %tid.ext = sext i32 %tid to i64
122 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
123 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
124 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
125 %a = load volatile float, float addrspace(1)* %a.gep
126 %b = load volatile float, float addrspace(1)* %b.gep
127 %fneg.b = fsub float -0.000000e+00, %b
128 %add = fadd float %a, %fneg.b
129 %fneg = fsub float -0.000000e+00, %add
130 store volatile float %fneg, float addrspace(1)* %out
134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
147 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
148 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
149 %a = load volatile float, float addrspace(1)* %a.gep
150 %b = load volatile float, float addrspace(1)* %b.gep
151 %fneg.a = fsub float -0.000000e+00, %a
152 %fneg.b = fsub float -0.000000e+00, %b
153 %add = fadd float %fneg.a, %fneg.b
154 %fneg = fsub float -0.000000e+00, %add
155 store volatile float %fneg, float addrspace(1)* %out
159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %tid.ext = sext i32 %tid to i64
175 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
176 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
177 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
178 %a = load volatile float, float addrspace(1)* %a.gep
179 %b = load volatile float, float addrspace(1)* %b.gep
180 %fneg.a = fsub float -0.000000e+00, %a
181 %add = fadd float %fneg.a, %b
182 %fneg = fsub float -0.000000e+00, %add
183 store volatile float %fneg, float addrspace(1)* %out
184 store volatile float %fneg.a, float addrspace(1)* %out
188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
202 %tid.ext = sext i32 %tid to i64
203 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
204 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
206 %a = load volatile float, float addrspace(1)* %a.gep
207 %b = load volatile float, float addrspace(1)* %b.gep
208 %fneg.a = fsub float -0.000000e+00, %a
209 %add = fadd float %fneg.a, %b
210 %fneg = fsub float -0.000000e+00, %add
211 %use1 = fmul float %fneg.a, %c
212 store volatile float %fneg, float addrspace(1)* %out
213 store volatile float %use1, float addrspace(1)* %out
217 ; --------------------------------------------------------------------------------
219 ; --------------------------------------------------------------------------------
221 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
222 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
223 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
224 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
225 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
226 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
227 %tid = call i32 @llvm.amdgcn.workitem.id.x()
228 %tid.ext = sext i32 %tid to i64
229 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
230 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
231 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
232 %a = load volatile float, float addrspace(1)* %a.gep
233 %b = load volatile float, float addrspace(1)* %b.gep
234 %mul = fmul float %a, %b
235 %fneg = fsub float -0.000000e+00, %mul
236 store float %fneg, float addrspace(1)* %out.gep
240 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
241 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
242 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
243 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
244 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
245 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
246 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
247 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
248 %tid = call i32 @llvm.amdgcn.workitem.id.x()
249 %tid.ext = sext i32 %tid to i64
250 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
251 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
252 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
253 %a = load volatile float, float addrspace(1)* %a.gep
254 %b = load volatile float, float addrspace(1)* %b.gep
255 %mul = fmul float %a, %b
256 %fneg = fsub float -0.000000e+00, %mul
257 store volatile float %fneg, float addrspace(1)* %out
258 store volatile float %mul, float addrspace(1)* %out
262 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
263 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
264 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
265 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
266 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
268 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
269 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
270 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
271 %tid = call i32 @llvm.amdgcn.workitem.id.x()
272 %tid.ext = sext i32 %tid to i64
273 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
274 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
275 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
276 %a = load volatile float, float addrspace(1)* %a.gep
277 %b = load volatile float, float addrspace(1)* %b.gep
278 %mul = fmul float %a, %b
279 %fneg = fsub float -0.000000e+00, %mul
280 %use1 = fmul float %mul, 4.0
281 store volatile float %fneg, float addrspace(1)* %out
282 store volatile float %use1, float addrspace(1)* %out
286 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
287 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
288 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
289 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
290 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
291 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
292 %tid = call i32 @llvm.amdgcn.workitem.id.x()
293 %tid.ext = sext i32 %tid to i64
294 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
295 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
296 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
297 %a = load volatile float, float addrspace(1)* %a.gep
298 %b = load volatile float, float addrspace(1)* %b.gep
299 %fneg.a = fsub float -0.000000e+00, %a
300 %mul = fmul float %fneg.a, %b
301 %fneg = fsub float -0.000000e+00, %mul
302 store volatile float %fneg, float addrspace(1)* %out
306 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
307 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
308 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
309 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
310 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
311 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
312 %tid = call i32 @llvm.amdgcn.workitem.id.x()
313 %tid.ext = sext i32 %tid to i64
314 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
315 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
316 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
317 %a = load volatile float, float addrspace(1)* %a.gep
318 %b = load volatile float, float addrspace(1)* %b.gep
319 %fneg.b = fsub float -0.000000e+00, %b
320 %mul = fmul float %a, %fneg.b
321 %fneg = fsub float -0.000000e+00, %mul
322 store volatile float %fneg, float addrspace(1)* %out
326 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
327 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
328 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
329 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
330 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
331 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
332 %tid = call i32 @llvm.amdgcn.workitem.id.x()
333 %tid.ext = sext i32 %tid to i64
334 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
335 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
336 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
337 %a = load volatile float, float addrspace(1)* %a.gep
338 %b = load volatile float, float addrspace(1)* %b.gep
339 %fneg.a = fsub float -0.000000e+00, %a
340 %fneg.b = fsub float -0.000000e+00, %b
341 %mul = fmul float %fneg.a, %fneg.b
342 %fneg = fsub float -0.000000e+00, %mul
343 store volatile float %fneg, float addrspace(1)* %out
347 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
348 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
349 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
350 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
351 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
353 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
354 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
355 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
356 %tid = call i32 @llvm.amdgcn.workitem.id.x()
357 %tid.ext = sext i32 %tid to i64
358 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
359 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
360 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
361 %a = load volatile float, float addrspace(1)* %a.gep
362 %b = load volatile float, float addrspace(1)* %b.gep
363 %fneg.a = fsub float -0.000000e+00, %a
364 %mul = fmul float %fneg.a, %b
365 %fneg = fsub float -0.000000e+00, %mul
366 store volatile float %fneg, float addrspace(1)* %out
367 store volatile float %fneg.a, float addrspace(1)* %out
371 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
372 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
373 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
374 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
375 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
376 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
377 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
378 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
379 %tid = call i32 @llvm.amdgcn.workitem.id.x()
380 %tid.ext = sext i32 %tid to i64
381 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
382 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
383 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
384 %a = load volatile float, float addrspace(1)* %a.gep
385 %b = load volatile float, float addrspace(1)* %b.gep
386 %fneg.a = fsub float -0.000000e+00, %a
387 %mul = fmul float %fneg.a, %b
388 %fneg = fsub float -0.000000e+00, %mul
389 %use1 = fmul float %fneg.a, %c
390 store volatile float %fneg, float addrspace(1)* %out
391 store volatile float %use1, float addrspace(1)* %out
395 ; --------------------------------------------------------------------------------
397 ; --------------------------------------------------------------------------------
399 ; GCN-LABEL: {{^}}v_fneg_minnum_f32:
400 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
401 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
402 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
403 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
404 define amdgpu_kernel void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
405 %tid = call i32 @llvm.amdgcn.workitem.id.x()
406 %tid.ext = sext i32 %tid to i64
407 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
408 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
409 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
410 %a = load volatile float, float addrspace(1)* %a.gep
411 %b = load volatile float, float addrspace(1)* %b.gep
412 %min = call float @llvm.minnum.f32(float %a, float %b)
413 %fneg = fsub float -0.000000e+00, %min
414 store float %fneg, float addrspace(1)* %out.gep
418 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32:
419 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
420 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
421 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
422 define amdgpu_kernel void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
423 %tid = call i32 @llvm.amdgcn.workitem.id.x()
424 %tid.ext = sext i32 %tid to i64
425 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
426 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
427 %a = load volatile float, float addrspace(1)* %a.gep
428 %min = call float @llvm.minnum.f32(float %a, float %a)
429 %min.fneg = fsub float -0.0, %min
430 store float %min.fneg, float addrspace(1)* %out.gep
434 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32:
435 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
436 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
437 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
438 define amdgpu_kernel void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
439 %tid = call i32 @llvm.amdgcn.workitem.id.x()
440 %tid.ext = sext i32 %tid to i64
441 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
442 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
443 %a = load volatile float, float addrspace(1)* %a.gep
444 %min = call float @llvm.minnum.f32(float 4.0, float %a)
445 %fneg = fsub float -0.000000e+00, %min
446 store float %fneg, float addrspace(1)* %out.gep
450 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32:
451 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
452 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
453 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
454 define amdgpu_kernel void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
455 %tid = call i32 @llvm.amdgcn.workitem.id.x()
456 %tid.ext = sext i32 %tid to i64
457 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
458 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
459 %a = load volatile float, float addrspace(1)* %a.gep
460 %min = call float @llvm.minnum.f32(float -4.0, float %a)
461 %fneg = fsub float -0.000000e+00, %min
462 store float %fneg, float addrspace(1)* %out.gep
466 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
467 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
468 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
469 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
470 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
471 %tid = call i32 @llvm.amdgcn.workitem.id.x()
472 %tid.ext = sext i32 %tid to i64
473 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
474 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
475 %a = load volatile float, float addrspace(1)* %a.gep
476 %min = call float @llvm.minnum.f32(float 0.0, float %a)
477 %fneg = fsub float -0.000000e+00, %min
478 store float %fneg, float addrspace(1)* %out.gep
482 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32:
483 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
484 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0
485 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
486 define amdgpu_kernel void @v_fneg_neg0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
487 %tid = call i32 @llvm.amdgcn.workitem.id.x()
488 %tid.ext = sext i32 %tid to i64
489 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
490 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
491 %a = load volatile float, float addrspace(1)* %a.gep
492 %min = call float @llvm.minnum.f32(float -0.0, float %a)
493 %fneg = fsub float -0.000000e+00, %min
494 store float %fneg, float addrspace(1)* %out.gep
498 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
499 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
501 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
502 ; SI: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[K]]
504 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[A]]
505 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
507 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
508 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
509 %tid = call i32 @llvm.amdgcn.workitem.id.x()
510 %tid.ext = sext i32 %tid to i64
511 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
512 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
513 %a = load volatile float, float addrspace(1)* %a.gep
514 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
515 %fneg = fsub float -0.000000e+00, %min
516 store float %fneg, float addrspace(1)* %out.gep
520 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
521 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
523 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
524 ; SI: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[K]]
526 ; VI: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0.15915494
528 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
529 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
530 %tid = call i32 @llvm.amdgcn.workitem.id.x()
531 %tid.ext = sext i32 %tid to i64
532 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
533 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
534 %a = load volatile float, float addrspace(1)* %a.gep
535 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
536 %fneg = fsub float -0.000000e+00, %min
537 store float %fneg, float addrspace(1)* %out.gep
541 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
542 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
544 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
545 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
546 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
548 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[A]]
549 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
551 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
552 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
553 %tid = call i32 @llvm.amdgcn.workitem.id.x()
554 %tid.ext = sext i32 %tid to i64
555 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
556 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
557 %a = load volatile half, half addrspace(1)* %a.gep
558 %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
559 %fneg = fsub half -0.000000e+00, %min
560 store half %fneg, half addrspace(1)* %out.gep
564 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
565 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
567 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
568 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
569 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
571 ; VI: v_max_f16_e64 [[RESULT:v[0-9]+]], -[[A]], 0.15915494
573 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
574 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
575 %tid = call i32 @llvm.amdgcn.workitem.id.x()
576 %tid.ext = sext i32 %tid to i64
577 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
578 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
579 %a = load volatile half, half addrspace(1)* %a.gep
580 %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
581 %fneg = fsub half -0.000000e+00, %min
582 store half %fneg, half addrspace(1)* %out.gep
586 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
587 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
589 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
590 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
591 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, -[[A]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
593 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
594 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
596 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
597 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
598 %tid = call i32 @llvm.amdgcn.workitem.id.x()
599 %tid.ext = sext i32 %tid to i64
600 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
601 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
602 %a = load volatile double, double addrspace(1)* %a.gep
603 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
604 %fneg = fsub double -0.000000e+00, %min
605 store double %fneg, double addrspace(1)* %out.gep
609 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
610 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
612 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
613 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
614 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
616 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], 0.15915494
618 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
619 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
620 %tid = call i32 @llvm.amdgcn.workitem.id.x()
621 %tid.ext = sext i32 %tid to i64
622 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
623 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
624 %a = load volatile double, double addrspace(1)* %a.gep
625 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
626 %fneg = fsub double -0.000000e+00, %min
627 store double %fneg, double addrspace(1)* %out.gep
631 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
633 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
635 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
636 %min = call float @llvm.minnum.f32(float -0.0, float %a)
637 %fneg = fsub float -0.000000e+00, %min
641 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32:
642 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
643 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
644 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
645 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
646 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
647 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
648 %tid = call i32 @llvm.amdgcn.workitem.id.x()
649 %tid.ext = sext i32 %tid to i64
650 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
651 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
652 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
653 %a = load volatile float, float addrspace(1)* %a.gep
654 %b = load volatile float, float addrspace(1)* %b.gep
655 %min = call float @llvm.minnum.f32(float 0.0, float %a)
656 %fneg = fsub float -0.000000e+00, %min
657 %mul = fmul float %fneg, %b
658 store float %mul, float addrspace(1)* %out.gep
662 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
663 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
664 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
665 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
668 ; SI: v_max_f32_e64 [[MIN:v[0-9]+]], -[[A]], [[K]]
669 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
671 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[A]]
672 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
674 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
675 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
676 %tid = call i32 @llvm.amdgcn.workitem.id.x()
677 %tid.ext = sext i32 %tid to i64
678 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
679 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
680 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
681 %a = load volatile float, float addrspace(1)* %a.gep
682 %b = load volatile float, float addrspace(1)* %b.gep
683 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
684 %fneg = fsub float -0.000000e+00, %min
685 %mul = fmul float %fneg, %b
686 store float %mul, float addrspace(1)* %out.gep
690 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32:
691 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
692 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
693 ; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
694 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
695 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
696 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
697 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
698 %tid = call i32 @llvm.amdgcn.workitem.id.x()
699 %tid.ext = sext i32 %tid to i64
700 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
701 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
702 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
703 %a = load volatile float, float addrspace(1)* %a.gep
704 %b = load volatile float, float addrspace(1)* %b.gep
705 %min = call float @llvm.minnum.f32(float %a, float %b)
706 %fneg = fsub float -0.000000e+00, %min
707 %use1 = fmul float %min, 4.0
708 store volatile float %fneg, float addrspace(1)* %out
709 store volatile float %use1, float addrspace(1)* %out
713 ; --------------------------------------------------------------------------------
715 ; --------------------------------------------------------------------------------
717 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32:
718 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
719 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
720 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
721 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
722 define amdgpu_kernel void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
723 %tid = call i32 @llvm.amdgcn.workitem.id.x()
724 %tid.ext = sext i32 %tid to i64
725 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
726 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
727 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
728 %a = load volatile float, float addrspace(1)* %a.gep
729 %b = load volatile float, float addrspace(1)* %b.gep
730 %min = call float @llvm.maxnum.f32(float %a, float %b)
731 %fneg = fsub float -0.000000e+00, %min
732 store float %fneg, float addrspace(1)* %out.gep
736 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32:
737 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
738 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
739 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
740 define amdgpu_kernel void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
741 %tid = call i32 @llvm.amdgcn.workitem.id.x()
742 %tid.ext = sext i32 %tid to i64
743 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
744 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
745 %a = load volatile float, float addrspace(1)* %a.gep
746 %min = call float @llvm.maxnum.f32(float %a, float %a)
747 %min.fneg = fsub float -0.0, %min
748 store float %min.fneg, float addrspace(1)* %out.gep
752 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32:
753 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
754 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0
755 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
756 define amdgpu_kernel void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
757 %tid = call i32 @llvm.amdgcn.workitem.id.x()
758 %tid.ext = sext i32 %tid to i64
759 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
760 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
761 %a = load volatile float, float addrspace(1)* %a.gep
762 %min = call float @llvm.maxnum.f32(float 4.0, float %a)
763 %fneg = fsub float -0.000000e+00, %min
764 store float %fneg, float addrspace(1)* %out.gep
768 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32:
769 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
770 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0
771 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
772 define amdgpu_kernel void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
773 %tid = call i32 @llvm.amdgcn.workitem.id.x()
774 %tid.ext = sext i32 %tid to i64
775 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
776 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
777 %a = load volatile float, float addrspace(1)* %a.gep
778 %min = call float @llvm.maxnum.f32(float -4.0, float %a)
779 %fneg = fsub float -0.000000e+00, %min
780 store float %fneg, float addrspace(1)* %out.gep
784 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
785 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
786 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
787 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
788 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
789 %tid = call i32 @llvm.amdgcn.workitem.id.x()
790 %tid.ext = sext i32 %tid to i64
791 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
792 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
793 %a = load volatile float, float addrspace(1)* %a.gep
794 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
795 %fneg = fsub float -0.000000e+00, %max
796 store float %fneg, float addrspace(1)* %out.gep
800 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32:
801 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
802 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0
803 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
804 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
805 %tid = call i32 @llvm.amdgcn.workitem.id.x()
806 %tid.ext = sext i32 %tid to i64
807 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
808 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
809 %a = load volatile float, float addrspace(1)* %a.gep
810 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
811 %fneg = fsub float -0.000000e+00, %max
812 store float %fneg, float addrspace(1)* %out.gep
816 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32:
817 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
818 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
819 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
820 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
821 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
822 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
823 %tid = call i32 @llvm.amdgcn.workitem.id.x()
824 %tid.ext = sext i32 %tid to i64
825 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
826 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
827 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
828 %a = load volatile float, float addrspace(1)* %a.gep
829 %b = load volatile float, float addrspace(1)* %b.gep
830 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
831 %fneg = fsub float -0.000000e+00, %max
832 %mul = fmul float %fneg, %b
833 store float %mul, float addrspace(1)* %out.gep
837 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32:
838 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
839 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
840 ; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
841 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
842 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
843 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
844 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
845 %tid = call i32 @llvm.amdgcn.workitem.id.x()
846 %tid.ext = sext i32 %tid to i64
847 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
848 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
849 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
850 %a = load volatile float, float addrspace(1)* %a.gep
851 %b = load volatile float, float addrspace(1)* %b.gep
852 %min = call float @llvm.maxnum.f32(float %a, float %b)
853 %fneg = fsub float -0.000000e+00, %min
854 %use1 = fmul float %min, 4.0
855 store volatile float %fneg, float addrspace(1)* %out
856 store volatile float %use1, float addrspace(1)* %out
860 ; --------------------------------------------------------------------------------
862 ; --------------------------------------------------------------------------------
864 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
865 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
866 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
867 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
869 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
870 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
872 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
873 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
874 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
875 %tid = call i32 @llvm.amdgcn.workitem.id.x()
876 %tid.ext = sext i32 %tid to i64
877 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
878 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
879 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
880 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
881 %a = load volatile float, float addrspace(1)* %a.gep
882 %b = load volatile float, float addrspace(1)* %b.gep
883 %c = load volatile float, float addrspace(1)* %c.gep
884 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
885 %fneg = fsub float -0.000000e+00, %fma
886 store float %fneg, float addrspace(1)* %out.gep
890 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
891 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
892 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
893 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
894 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
895 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
896 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
897 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
898 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
899 %tid = call i32 @llvm.amdgcn.workitem.id.x()
900 %tid.ext = sext i32 %tid to i64
901 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
902 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
903 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
904 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
905 %a = load volatile float, float addrspace(1)* %a.gep
906 %b = load volatile float, float addrspace(1)* %b.gep
907 %c = load volatile float, float addrspace(1)* %c.gep
908 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
909 %fneg = fsub float -0.000000e+00, %fma
910 store volatile float %fneg, float addrspace(1)* %out
911 store volatile float %fma, float addrspace(1)* %out
915 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
916 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
917 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
918 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
920 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
921 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
922 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
924 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
925 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
927 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
928 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
929 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
930 %tid = call i32 @llvm.amdgcn.workitem.id.x()
931 %tid.ext = sext i32 %tid to i64
932 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
933 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
934 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
935 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
936 %a = load volatile float, float addrspace(1)* %a.gep
937 %b = load volatile float, float addrspace(1)* %b.gep
938 %c = load volatile float, float addrspace(1)* %c.gep
939 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
940 %fneg = fsub float -0.000000e+00, %fma
941 %use1 = fmul float %fma, 4.0
942 store volatile float %fneg, float addrspace(1)* %out
943 store volatile float %use1, float addrspace(1)* %out
947 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
948 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
949 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
950 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
952 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
953 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
955 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
956 ; GCN-NSZ-NOT: [[FMA]]
957 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
958 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
959 %tid = call i32 @llvm.amdgcn.workitem.id.x()
960 %tid.ext = sext i32 %tid to i64
961 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
962 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
963 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
964 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
965 %a = load volatile float, float addrspace(1)* %a.gep
966 %b = load volatile float, float addrspace(1)* %b.gep
967 %c = load volatile float, float addrspace(1)* %c.gep
968 %fneg.a = fsub float -0.000000e+00, %a
969 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
970 %fneg = fsub float -0.000000e+00, %fma
971 store volatile float %fneg, float addrspace(1)* %out
975 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
976 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
977 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
978 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
980 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
981 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
983 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
984 ; GCN-NSZ-NOT: [[FMA]]
985 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
986 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
987 %tid = call i32 @llvm.amdgcn.workitem.id.x()
988 %tid.ext = sext i32 %tid to i64
989 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
990 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
991 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
992 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
993 %a = load volatile float, float addrspace(1)* %a.gep
994 %b = load volatile float, float addrspace(1)* %b.gep
995 %c = load volatile float, float addrspace(1)* %c.gep
996 %fneg.b = fsub float -0.000000e+00, %b
997 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
998 %fneg = fsub float -0.000000e+00, %fma
999 store volatile float %fneg, float addrspace(1)* %out
1003 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1004 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1005 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1006 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1008 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1009 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1011 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1012 ; GCN-NSZ-NOT: [[FMA]]
1013 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1014 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1015 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1016 %tid.ext = sext i32 %tid to i64
1017 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1018 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1019 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1020 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1021 %a = load volatile float, float addrspace(1)* %a.gep
1022 %b = load volatile float, float addrspace(1)* %b.gep
1023 %c = load volatile float, float addrspace(1)* %c.gep
1024 %fneg.a = fsub float -0.000000e+00, %a
1025 %fneg.b = fsub float -0.000000e+00, %b
1026 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1027 %fneg = fsub float -0.000000e+00, %fma
1028 store volatile float %fneg, float addrspace(1)* %out
1032 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1033 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1034 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1035 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1037 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1038 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1040 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1041 ; GCN-NSZ-NOT: [[FMA]]
1042 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1043 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1044 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1045 %tid.ext = sext i32 %tid to i64
1046 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1047 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1048 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1049 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1050 %a = load volatile float, float addrspace(1)* %a.gep
1051 %b = load volatile float, float addrspace(1)* %b.gep
1052 %c = load volatile float, float addrspace(1)* %c.gep
1053 %fneg.a = fsub float -0.000000e+00, %a
1054 %fneg.c = fsub float -0.000000e+00, %c
1055 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1056 %fneg = fsub float -0.000000e+00, %fma
1057 store volatile float %fneg, float addrspace(1)* %out
1061 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1062 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1063 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1064 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1066 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1067 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1069 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1070 ; GCN-NSZ-NOT: [[FMA]]
1071 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1072 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1073 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1074 %tid.ext = sext i32 %tid to i64
1075 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1076 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1077 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1078 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1079 %a = load volatile float, float addrspace(1)* %a.gep
1080 %b = load volatile float, float addrspace(1)* %b.gep
1081 %c = load volatile float, float addrspace(1)* %c.gep
1082 %fneg.c = fsub float -0.000000e+00, %c
1083 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1084 %fneg = fsub float -0.000000e+00, %fma
1085 store volatile float %fneg, float addrspace(1)* %out
1089 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1090 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1091 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1092 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1094 ; GCN-SAFE: v_xor_b32
1095 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1096 ; GCN-SAFE: v_xor_b32
1098 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1099 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1101 ; GCN-NSZ-NOT: [[FMA]]
1102 ; GCN-NSZ-NOT: [[NEG_A]]
1103 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1104 ; GCN-NSZ-NOT: [[NEG_A]]
1105 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1106 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1107 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1108 %tid.ext = sext i32 %tid to i64
1109 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1110 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1111 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1112 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1113 %a = load volatile float, float addrspace(1)* %a.gep
1114 %b = load volatile float, float addrspace(1)* %b.gep
1115 %c = load volatile float, float addrspace(1)* %c.gep
1116 %fneg.a = fsub float -0.000000e+00, %a
1117 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1118 %fneg = fsub float -0.000000e+00, %fma
1119 store volatile float %fneg, float addrspace(1)* %out
1120 store volatile float %fneg.a, float addrspace(1)* %out
1124 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1125 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1126 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1127 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1129 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1130 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1131 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1133 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1134 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1135 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1136 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1137 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1138 %tid.ext = sext i32 %tid to i64
1139 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1140 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1141 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1142 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1143 %a = load volatile float, float addrspace(1)* %a.gep
1144 %b = load volatile float, float addrspace(1)* %b.gep
1145 %c = load volatile float, float addrspace(1)* %c.gep
1146 %fneg.a = fsub float -0.000000e+00, %a
1147 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1148 %fneg = fsub float -0.000000e+00, %fma
1149 %use1 = fmul float %fneg.a, %d
1150 store volatile float %fneg, float addrspace(1)* %out
1151 store volatile float %use1, float addrspace(1)* %out
1155 ; --------------------------------------------------------------------------------
1157 ; --------------------------------------------------------------------------------
1159 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1160 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1161 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1162 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1164 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1165 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1167 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1168 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1169 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1170 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1171 %tid.ext = sext i32 %tid to i64
1172 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1173 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1174 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1175 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1176 %a = load volatile float, float addrspace(1)* %a.gep
1177 %b = load volatile float, float addrspace(1)* %b.gep
1178 %c = load volatile float, float addrspace(1)* %c.gep
1179 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1180 %fneg = fsub float -0.000000e+00, %fma
1181 store float %fneg, float addrspace(1)* %out.gep
1185 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1186 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1187 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1188 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1190 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1191 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1192 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1194 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]]
1195 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1197 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1198 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1199 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1200 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1201 %tid.ext = sext i32 %tid to i64
1202 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1203 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1204 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1206 %a = load volatile float, float addrspace(1)* %a.gep
1207 %b = load volatile float, float addrspace(1)* %b.gep
1208 %c = load volatile float, float addrspace(1)* %c.gep
1209 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1210 %fneg = fsub float -0.000000e+00, %fma
1211 %use1 = fmul float %fma, 4.0
1212 store volatile float %fneg, float addrspace(1)* %out
1213 store volatile float %use1, float addrspace(1)* %out
1217 ; --------------------------------------------------------------------------------
1219 ; --------------------------------------------------------------------------------
1221 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1222 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1223 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1224 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1225 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1226 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1227 %tid.ext = sext i32 %tid to i64
1228 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1229 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1230 %a = load volatile float, float addrspace(1)* %a.gep
1231 %fpext = fpext float %a to double
1232 %fneg = fsub double -0.000000e+00, %fpext
1233 store double %fneg, double addrspace(1)* %out.gep
1237 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1238 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1239 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1240 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1241 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1242 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1243 %tid.ext = sext i32 %tid to i64
1244 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1245 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1246 %a = load volatile float, float addrspace(1)* %a.gep
1247 %fneg.a = fsub float -0.000000e+00, %a
1248 %fpext = fpext float %fneg.a to double
1249 %fneg = fsub double -0.000000e+00, %fpext
1250 store double %fneg, double addrspace(1)* %out.gep
1254 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1255 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1256 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1257 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1258 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1259 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1260 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1261 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1262 %tid.ext = sext i32 %tid to i64
1263 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1264 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1265 %a = load volatile float, float addrspace(1)* %a.gep
1266 %fneg.a = fsub float -0.000000e+00, %a
1267 %fpext = fpext float %fneg.a to double
1268 %fneg = fsub double -0.000000e+00, %fpext
1269 store volatile double %fneg, double addrspace(1)* %out.gep
1270 store volatile float %fneg.a, float addrspace(1)* undef
1274 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1275 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1276 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1277 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1278 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1279 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1280 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1281 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1282 %tid.ext = sext i32 %tid to i64
1283 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1284 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1285 %a = load volatile float, float addrspace(1)* %a.gep
1286 %fpext = fpext float %a to double
1287 %fneg = fsub double -0.000000e+00, %fpext
1288 store volatile double %fneg, double addrspace(1)* %out.gep
1289 store volatile double %fpext, double addrspace(1)* undef
1293 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1294 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1295 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1296 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1297 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1298 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1299 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1300 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1301 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1302 %tid.ext = sext i32 %tid to i64
1303 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1304 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1305 %a = load volatile float, float addrspace(1)* %a.gep
1306 %fpext = fpext float %a to double
1307 %fneg = fsub double -0.000000e+00, %fpext
1308 %mul = fmul double %fpext, 4.0
1309 store volatile double %fneg, double addrspace(1)* %out.gep
1310 store volatile double %mul, double addrspace(1)* %out.gep
1314 ; FIXME: Source modifiers not folded for f16->f32
1315 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1316 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1317 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1318 %tid.ext = sext i32 %tid to i64
1319 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1320 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1321 %a = load volatile half, half addrspace(1)* %a.gep
1322 %fpext = fpext half %a to float
1323 %fneg = fsub float -0.000000e+00, %fpext
1324 store volatile float %fneg, float addrspace(1)* %out.gep
1325 store volatile float %fpext, float addrspace(1)* %out.gep
1329 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1330 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1331 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1332 %tid.ext = sext i32 %tid to i64
1333 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1334 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1335 %a = load volatile half, half addrspace(1)* %a.gep
1336 %fpext = fpext half %a to float
1337 %fneg = fsub float -0.000000e+00, %fpext
1338 %mul = fmul float %fpext, 4.0
1339 store volatile float %fneg, float addrspace(1)* %out.gep
1340 store volatile float %mul, float addrspace(1)* %out.gep
1344 ; --------------------------------------------------------------------------------
1346 ; --------------------------------------------------------------------------------
1348 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1349 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1350 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1351 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1352 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1353 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1354 %tid.ext = sext i32 %tid to i64
1355 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1356 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1357 %a = load volatile double, double addrspace(1)* %a.gep
1358 %fpround = fptrunc double %a to float
1359 %fneg = fsub float -0.000000e+00, %fpround
1360 store float %fneg, float addrspace(1)* %out.gep
1364 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1365 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1366 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1367 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1368 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1369 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1370 %tid.ext = sext i32 %tid to i64
1371 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1372 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1373 %a = load volatile double, double addrspace(1)* %a.gep
1374 %fneg.a = fsub double -0.000000e+00, %a
1375 %fpround = fptrunc double %fneg.a to float
1376 %fneg = fsub float -0.000000e+00, %fpround
1377 store float %fneg, float addrspace(1)* %out.gep
1381 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1382 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1383 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1384 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1385 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1386 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1387 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1388 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1389 %tid.ext = sext i32 %tid to i64
1390 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1391 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1392 %a = load volatile double, double addrspace(1)* %a.gep
1393 %fneg.a = fsub double -0.000000e+00, %a
1394 %fpround = fptrunc double %fneg.a to float
1395 %fneg = fsub float -0.000000e+00, %fpround
1396 store volatile float %fneg, float addrspace(1)* %out.gep
1397 store volatile double %fneg.a, double addrspace(1)* undef
1401 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1402 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1403 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1404 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1406 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1407 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1408 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1409 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1410 %tid.ext = sext i32 %tid to i64
1411 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1412 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1413 %a = load volatile double, double addrspace(1)* %a.gep
1414 %fneg.a = fsub double -0.000000e+00, %a
1415 %fpround = fptrunc double %fneg.a to float
1416 %fneg = fsub float -0.000000e+00, %fpround
1417 %use1 = fmul double %fneg.a, %c
1418 store volatile float %fneg, float addrspace(1)* %out.gep
1419 store volatile double %use1, double addrspace(1)* undef
1423 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1424 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1425 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1426 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1427 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1428 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1429 %tid.ext = sext i32 %tid to i64
1430 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1431 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1432 %a = load volatile float, float addrspace(1)* %a.gep
1433 %fpround = fptrunc float %a to half
1434 %fneg = fsub half -0.000000e+00, %fpround
1435 store half %fneg, half addrspace(1)* %out.gep
1439 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1440 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1441 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1442 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1443 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1444 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1445 %tid.ext = sext i32 %tid to i64
1446 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1447 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1448 %a = load volatile float, float addrspace(1)* %a.gep
1449 %fneg.a = fsub float -0.000000e+00, %a
1450 %fpround = fptrunc float %fneg.a to half
1451 %fneg = fsub half -0.000000e+00, %fpround
1452 store half %fneg, half addrspace(1)* %out.gep
1456 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1457 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1458 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1459 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1460 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1461 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1462 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1463 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1464 %tid.ext = sext i32 %tid to i64
1465 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1466 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1467 %a = load volatile double, double addrspace(1)* %a.gep
1468 %fpround = fptrunc double %a to float
1469 %fneg = fsub float -0.000000e+00, %fpround
1470 store volatile float %fneg, float addrspace(1)* %out.gep
1471 store volatile float %fpround, float addrspace(1)* %out.gep
1475 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1476 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1477 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1478 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1479 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1481 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1482 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1483 %tid.ext = sext i32 %tid to i64
1484 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1485 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1486 %a = load volatile float, float addrspace(1)* %a.gep
1487 %fneg.a = fsub float -0.000000e+00, %a
1488 %fpround = fptrunc float %fneg.a to half
1489 %fneg = fsub half -0.000000e+00, %fpround
1490 store volatile half %fneg, half addrspace(1)* %out.gep
1491 store volatile float %fneg.a, float addrspace(1)* undef
1495 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1496 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1497 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1498 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1499 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1500 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1501 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1502 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1503 %tid.ext = sext i32 %tid to i64
1504 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1505 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1506 %a = load volatile float, float addrspace(1)* %a.gep
1507 %fneg.a = fsub float -0.000000e+00, %a
1508 %fpround = fptrunc float %fneg.a to half
1509 %fneg = fsub half -0.000000e+00, %fpround
1510 %use1 = fmul float %fneg.a, %c
1511 store volatile half %fneg, half addrspace(1)* %out.gep
1512 store volatile float %use1, float addrspace(1)* undef
1516 ; --------------------------------------------------------------------------------
1518 ; --------------------------------------------------------------------------------
1520 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1521 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1522 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1523 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1524 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1525 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1526 %tid.ext = sext i32 %tid to i64
1527 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1528 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1529 %a = load volatile float, float addrspace(1)* %a.gep
1530 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1531 %fneg = fsub float -0.000000e+00, %rcp
1532 store float %fneg, float addrspace(1)* %out.gep
1536 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1537 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1538 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1539 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1540 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1541 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1542 %tid.ext = sext i32 %tid to i64
1543 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1544 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1545 %a = load volatile float, float addrspace(1)* %a.gep
1546 %fneg.a = fsub float -0.000000e+00, %a
1547 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1548 %fneg = fsub float -0.000000e+00, %rcp
1549 store float %fneg, float addrspace(1)* %out.gep
1553 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1554 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1555 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1556 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1557 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1558 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1559 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1560 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1561 %tid.ext = sext i32 %tid to i64
1562 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1563 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1564 %a = load volatile float, float addrspace(1)* %a.gep
1565 %fneg.a = fsub float -0.000000e+00, %a
1566 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1567 %fneg = fsub float -0.000000e+00, %rcp
1568 store volatile float %fneg, float addrspace(1)* %out.gep
1569 store volatile float %fneg.a, float addrspace(1)* undef
1573 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1574 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1575 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1576 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1577 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1578 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1579 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1580 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1581 %tid.ext = sext i32 %tid to i64
1582 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1583 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1584 %a = load volatile float, float addrspace(1)* %a.gep
1585 %fneg.a = fsub float -0.000000e+00, %a
1586 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1587 %fneg = fsub float -0.000000e+00, %rcp
1588 %use1 = fmul float %fneg.a, %c
1589 store volatile float %fneg, float addrspace(1)* %out.gep
1590 store volatile float %use1, float addrspace(1)* undef
1594 ; --------------------------------------------------------------------------------
1596 ; --------------------------------------------------------------------------------
1598 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1599 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1600 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1601 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1602 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1603 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1604 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1605 %tid.ext = sext i32 %tid to i64
1606 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1607 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1608 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1609 %a = load volatile float, float addrspace(1)* %a.gep
1610 %b = load volatile float, float addrspace(1)* %b.gep
1611 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1612 %fneg = fsub float -0.000000e+00, %mul
1613 store float %fneg, float addrspace(1)* %out.gep
1617 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1618 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1619 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1620 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1621 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1622 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1623 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1624 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1625 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1626 %tid.ext = sext i32 %tid to i64
1627 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1628 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1629 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1630 %a = load volatile float, float addrspace(1)* %a.gep
1631 %b = load volatile float, float addrspace(1)* %b.gep
1632 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1633 %fneg = fsub float -0.000000e+00, %mul
1634 store volatile float %fneg, float addrspace(1)* %out
1635 store volatile float %mul, float addrspace(1)* %out
1639 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1640 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1641 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1642 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1643 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1644 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1645 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1646 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1647 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1648 %tid.ext = sext i32 %tid to i64
1649 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1650 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1651 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1652 %a = load volatile float, float addrspace(1)* %a.gep
1653 %b = load volatile float, float addrspace(1)* %b.gep
1654 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1655 %fneg = fsub float -0.000000e+00, %mul
1656 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1657 store volatile float %fneg, float addrspace(1)* %out
1658 store volatile float %use1, float addrspace(1)* %out
1662 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1663 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1664 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1665 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1666 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1667 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1668 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1669 %tid.ext = sext i32 %tid to i64
1670 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1671 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1672 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1673 %a = load volatile float, float addrspace(1)* %a.gep
1674 %b = load volatile float, float addrspace(1)* %b.gep
1675 %fneg.a = fsub float -0.000000e+00, %a
1676 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1677 %fneg = fsub float -0.000000e+00, %mul
1678 store volatile float %fneg, float addrspace(1)* %out
1682 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1683 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1684 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1685 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1686 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1687 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1688 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1689 %tid.ext = sext i32 %tid to i64
1690 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1691 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1692 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1693 %a = load volatile float, float addrspace(1)* %a.gep
1694 %b = load volatile float, float addrspace(1)* %b.gep
1695 %fneg.b = fsub float -0.000000e+00, %b
1696 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1697 %fneg = fsub float -0.000000e+00, %mul
1698 store volatile float %fneg, float addrspace(1)* %out
1702 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1703 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1704 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1705 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1706 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1707 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1708 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1709 %tid.ext = sext i32 %tid to i64
1710 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1711 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1712 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1713 %a = load volatile float, float addrspace(1)* %a.gep
1714 %b = load volatile float, float addrspace(1)* %b.gep
1715 %fneg.a = fsub float -0.000000e+00, %a
1716 %fneg.b = fsub float -0.000000e+00, %b
1717 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1718 %fneg = fsub float -0.000000e+00, %mul
1719 store volatile float %fneg, float addrspace(1)* %out
1723 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1724 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1725 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1726 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1727 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1728 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1729 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1730 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1731 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1732 %tid.ext = sext i32 %tid to i64
1733 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1734 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1735 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1736 %a = load volatile float, float addrspace(1)* %a.gep
1737 %b = load volatile float, float addrspace(1)* %b.gep
1738 %fneg.a = fsub float -0.000000e+00, %a
1739 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1740 %fneg = fsub float -0.000000e+00, %mul
1741 store volatile float %fneg, float addrspace(1)* %out
1742 store volatile float %fneg.a, float addrspace(1)* %out
1746 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1747 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1748 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1749 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1750 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1751 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1753 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1754 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1755 %tid.ext = sext i32 %tid to i64
1756 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1757 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1758 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1759 %a = load volatile float, float addrspace(1)* %a.gep
1760 %b = load volatile float, float addrspace(1)* %b.gep
1761 %fneg.a = fsub float -0.000000e+00, %a
1762 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1763 %fneg = fsub float -0.000000e+00, %mul
1764 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1765 store volatile float %fneg, float addrspace(1)* %out
1766 store volatile float %use1, float addrspace(1)* %out
1770 ; --------------------------------------------------------------------------------
1772 ; --------------------------------------------------------------------------------
1774 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1775 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1776 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1777 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1778 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1779 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1780 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1781 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1782 %tid.ext = sext i32 %tid to i64
1783 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1784 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1785 %a = load volatile float, float addrspace(1)* %a.gep
1786 %sin = call float @llvm.sin.f32(float %a)
1787 %fneg = fsub float -0.000000e+00, %sin
1788 store float %fneg, float addrspace(1)* %out.gep
1792 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1793 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1794 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1795 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1796 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1797 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1798 %tid.ext = sext i32 %tid to i64
1799 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1800 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1801 %a = load volatile float, float addrspace(1)* %a.gep
1802 %sin = call float @llvm.amdgcn.sin.f32(float %a)
1803 %fneg = fsub float -0.0, %sin
1804 store float %fneg, float addrspace(1)* %out.gep
1808 ; --------------------------------------------------------------------------------
1810 ; --------------------------------------------------------------------------------
1812 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
1813 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1814 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1815 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1816 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1817 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1818 %tid.ext = sext i32 %tid to i64
1819 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1820 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1821 %a = load volatile float, float addrspace(1)* %a.gep
1822 %trunc = call float @llvm.trunc.f32(float %a)
1823 %fneg = fsub float -0.0, %trunc
1824 store float %fneg, float addrspace(1)* %out.gep
1828 ; --------------------------------------------------------------------------------
1830 ; --------------------------------------------------------------------------------
1832 ; GCN-LABEL: {{^}}v_fneg_round_f32:
1833 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1834 ; GCN: v_trunc_f32_e32
1835 ; GCN: v_sub_f32_e32
1836 ; GCN: v_cndmask_b32
1838 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
1839 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
1841 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
1842 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1843 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1844 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1845 %tid.ext = sext i32 %tid to i64
1846 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1847 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1848 %a = load volatile float, float addrspace(1)* %a.gep
1849 %round = call float @llvm.round.f32(float %a)
1850 %fneg = fsub float -0.0, %round
1851 store float %fneg, float addrspace(1)* %out.gep
1855 ; --------------------------------------------------------------------------------
1857 ; --------------------------------------------------------------------------------
1859 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
1860 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1861 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1862 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1863 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1864 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1865 %tid.ext = sext i32 %tid to i64
1866 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1868 %a = load volatile float, float addrspace(1)* %a.gep
1869 %rint = call float @llvm.rint.f32(float %a)
1870 %fneg = fsub float -0.0, %rint
1871 store float %fneg, float addrspace(1)* %out.gep
1875 ; --------------------------------------------------------------------------------
1877 ; --------------------------------------------------------------------------------
1879 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
1880 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1881 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1882 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1883 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1884 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1885 %tid.ext = sext i32 %tid to i64
1886 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1887 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1888 %a = load volatile float, float addrspace(1)* %a.gep
1889 %nearbyint = call float @llvm.nearbyint.f32(float %a)
1890 %fneg = fsub float -0.0, %nearbyint
1891 store float %fneg, float addrspace(1)* %out.gep
1895 ; --------------------------------------------------------------------------------
1896 ; fcanonicalize tests
1897 ; --------------------------------------------------------------------------------
1899 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
1900 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1901 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
1902 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1903 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1904 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1905 %tid.ext = sext i32 %tid to i64
1906 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1907 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1908 %a = load volatile float, float addrspace(1)* %a.gep
1909 %trunc = call float @llvm.canonicalize.f32(float %a)
1910 %fneg = fsub float -0.0, %trunc
1911 store float %fneg, float addrspace(1)* %out.gep
1915 ; --------------------------------------------------------------------------------
1917 ; --------------------------------------------------------------------------------
1919 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
1920 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1921 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1922 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
1923 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
1924 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
1925 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1926 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1927 %tid.ext = sext i32 %tid to i64
1928 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1929 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1930 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1931 %a = load volatile float, float addrspace(1)* %a.gep
1932 %b = load volatile float, float addrspace(1)* %b.gep
1933 %mul = fmul float %a, %b
1934 %fneg = fsub float -0.0, %mul
1935 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
1936 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
1937 store volatile float %intrp0, float addrspace(1)* %out.gep
1938 store volatile float %intrp1, float addrspace(1)* %out.gep
1942 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
1943 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1944 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1945 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
1946 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
1947 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
1948 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1949 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1950 %tid.ext = sext i32 %tid to i64
1951 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1952 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1953 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1954 %a = load volatile float, float addrspace(1)* %a.gep
1955 %b = load volatile float, float addrspace(1)* %b.gep
1956 %mul = fmul float %a, %b
1957 %fneg = fsub float -0.0, %mul
1958 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
1959 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
1960 store volatile float %intrp0, float addrspace(1)* %out.gep
1961 store volatile float %intrp1, float addrspace(1)* %out.gep
1965 ; --------------------------------------------------------------------------------
1967 ; --------------------------------------------------------------------------------
1969 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
1970 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1971 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1972 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1973 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
1974 ; GCN: s_cbranch_scc0
1976 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
1979 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
1980 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
1981 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1983 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
1984 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1985 %tid.ext = sext i32 %tid to i64
1986 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1987 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1988 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1989 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1990 %a = load volatile float, float addrspace(1)* %a.gep
1991 %b = load volatile float, float addrspace(1)* %b.gep
1992 %c = load volatile float, float addrspace(1)* %c.gep
1993 %mul = fmul float %a, %b
1994 %fneg = fsub float -0.0, %mul
1995 %cmp0 = icmp eq i32 %d, 0
1996 br i1 %cmp0, label %if, label %endif
1999 %mul1 = fmul float %fneg, %c
2000 store volatile float %mul1, float addrspace(1)* %out.gep
2004 store volatile float %mul, float addrspace(1)* %out.gep
2008 ; --------------------------------------------------------------------------------
2010 ; --------------------------------------------------------------------------------
2012 ; Can't fold into use, so should fold into source
2013 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2014 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2015 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2016 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2017 ; GCN: ; use [[MUL]]
2018 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2019 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2020 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2021 %tid.ext = sext i32 %tid to i64
2022 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2023 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2024 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2025 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2026 %a = load volatile float, float addrspace(1)* %a.gep
2027 %b = load volatile float, float addrspace(1)* %b.gep
2028 %c = load volatile float, float addrspace(1)* %c.gep
2029 %mul = fmul float %a, %b
2030 %fneg = fsub float -0.0, %mul
2031 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2032 store volatile float %fneg, float addrspace(1)* %out.gep
2036 ; --------------------------------------------------------------------------------
2038 ; --------------------------------------------------------------------------------
2040 ; Can't fold into use, so should fold into source
2041 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2042 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2043 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2044 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2045 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2046 ; GCN: ; use [[NEG]]
2047 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2048 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2049 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2050 %tid.ext = sext i32 %tid to i64
2051 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2052 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2053 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2054 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2055 %a = load volatile float, float addrspace(1)* %a.gep
2056 %b = load volatile float, float addrspace(1)* %b.gep
2057 %c = load volatile float, float addrspace(1)* %c.gep
2058 %mul = fmul float %a, %b
2059 %fneg = fsub float -0.0, %mul
2060 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2061 store volatile float %mul, float addrspace(1)* %out.gep
2065 ; --------------------------------------------------------------------------------
2066 ; code size regression tests
2067 ; --------------------------------------------------------------------------------
2069 ; There are multiple users of the fneg that must use a VOP3
2070 ; instruction, so there is no penalty
2071 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2072 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2073 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2074 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2076 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2077 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2079 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2080 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2081 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2082 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2083 %tid.ext = sext i32 %tid to i64
2084 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2085 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2086 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2087 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2088 %a = load volatile float, float addrspace(1)* %a.gep
2089 %b = load volatile float, float addrspace(1)* %b.gep
2090 %c = load volatile float, float addrspace(1)* %c.gep
2092 %fneg.a = fsub float -0.0, %a
2093 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2094 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2096 store volatile float %fma0, float addrspace(1)* %out
2097 store volatile float %fma1, float addrspace(1)* %out
2101 ; There are multiple users, but both require using a larger encoding
2104 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2105 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2106 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2107 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2109 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2110 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2111 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2112 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2113 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2114 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2115 %tid.ext = sext i32 %tid to i64
2116 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2117 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2118 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2119 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2120 %a = load volatile float, float addrspace(1)* %a.gep
2121 %b = load volatile float, float addrspace(1)* %b.gep
2122 %c = load volatile float, float addrspace(1)* %c.gep
2124 %fneg.a = fsub float -0.0, %a
2125 %mul0 = fmul float %fneg.a, %b
2126 %mul1 = fmul float %fneg.a, %c
2128 store volatile float %mul0, float addrspace(1)* %out
2129 store volatile float %mul1, float addrspace(1)* %out
2133 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2134 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2137 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2139 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2140 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2142 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2143 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2144 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2145 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2146 %tid.ext = sext i32 %tid to i64
2147 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2148 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2149 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2150 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2151 %a = load volatile float, float addrspace(1)* %a.gep
2152 %b = load volatile float, float addrspace(1)* %b.gep
2153 %c = load volatile float, float addrspace(1)* %c.gep
2155 %fneg.a = fsub float -0.0, %a
2156 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2157 %mul1 = fmul float %fneg.a, %c
2159 store volatile float %fma0, float addrspace(1)* %out
2160 store volatile float %mul1, float addrspace(1)* %out
2164 ; The use of the fneg requires a code size increase, but folding into
2165 ; the source does not
2167 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2168 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2169 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2170 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2171 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2173 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2174 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2175 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2177 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2178 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2179 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2181 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2182 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2183 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2185 %tid.ext = sext i32 %tid to i64
2186 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2187 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2188 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2189 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2190 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2191 %a = load volatile float, float addrspace(1)* %a.gep
2192 %b = load volatile float, float addrspace(1)* %b.gep
2193 %c = load volatile float, float addrspace(1)* %c.gep
2194 %d = load volatile float, float addrspace(1)* %d.gep
2196 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2197 %fneg.fma0 = fsub float -0.0, %fma0
2198 %mul1 = fmul float %fneg.fma0, %c
2199 %mul2 = fmul float %fneg.fma0, %d
2201 store volatile float %mul1, float addrspace(1)* %out
2202 store volatile float %mul2, float addrspace(1)* %out
2206 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2207 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2208 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2209 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2210 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2212 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2213 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2214 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2216 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2217 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2218 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2220 %tid.ext = sext i32 %tid to i64
2221 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2222 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2223 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2224 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2225 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2226 %a = load volatile double, double addrspace(1)* %a.gep
2227 %b = load volatile double, double addrspace(1)* %b.gep
2228 %c = load volatile double, double addrspace(1)* %c.gep
2229 %d = load volatile double, double addrspace(1)* %d.gep
2231 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2232 %fneg.fma0 = fsub double -0.0, %fma0
2233 %mul1 = fmul double %fneg.fma0, %c
2234 %mul2 = fmul double %fneg.fma0, %d
2236 store volatile double %mul1, double addrspace(1)* %out
2237 store volatile double %mul2, double addrspace(1)* %out
2241 ; %trunc.a has one fneg use, but it requires a code size increase and
2242 ; %the fneg can instead be folded for free into the fma.
2244 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2245 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2246 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2247 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2248 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2249 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2250 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2251 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2252 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2253 %tid.ext = sext i32 %tid to i64
2254 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2255 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2256 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2257 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2258 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2259 %a = load volatile float, float addrspace(1)* %a.gep
2260 %b = load volatile float, float addrspace(1)* %b.gep
2261 %c = load volatile float, float addrspace(1)* %c.gep
2262 %d = load volatile float, float addrspace(1)* %d.gep
2264 %trunc.a = call float @llvm.trunc.f32(float %a)
2265 %trunc.fneg.a = fsub float -0.0, %trunc.a
2266 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2267 store volatile float %fma0, float addrspace(1)* %out
2271 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2272 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2273 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2274 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2275 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2276 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2277 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2278 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2279 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2280 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2281 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2282 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2283 %tid.ext = sext i32 %tid to i64
2284 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2285 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2286 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2287 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2288 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2289 %a = load volatile float, float addrspace(1)* %a.gep
2290 %b = load volatile float, float addrspace(1)* %b.gep
2291 %c = load volatile float, float addrspace(1)* %c.gep
2292 %d = load volatile float, float addrspace(1)* %d.gep
2294 %trunc.a = call float @llvm.trunc.f32(float %a)
2295 %trunc.fneg.a = fsub float -0.0, %trunc.a
2296 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2297 %mul1 = fmul float %trunc.a, %d
2298 store volatile float %fma0, float addrspace(1)* %out
2299 store volatile float %mul1, float addrspace(1)* %out
2303 declare i32 @llvm.amdgcn.workitem.id.x() #1
2304 declare float @llvm.fma.f32(float, float, float) #1
2305 declare float @llvm.fmuladd.f32(float, float, float) #1
2306 declare float @llvm.sin.f32(float) #1
2307 declare float @llvm.trunc.f32(float) #1
2308 declare float @llvm.round.f32(float) #1
2309 declare float @llvm.rint.f32(float) #1
2310 declare float @llvm.nearbyint.f32(float) #1
2311 declare float @llvm.canonicalize.f32(float) #1
2312 declare float @llvm.minnum.f32(float, float) #1
2313 declare float @llvm.maxnum.f32(float, float) #1
2314 declare half @llvm.minnum.f16(half, half) #1
2315 declare double @llvm.minnum.f64(double, double) #1
2316 declare double @llvm.fma.f64(double, double, double) #1
2318 declare float @llvm.amdgcn.sin.f32(float) #1
2319 declare float @llvm.amdgcn.rcp.f32(float) #1
2320 declare float @llvm.amdgcn.rcp.legacy(float) #1
2321 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2322 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2323 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2325 attributes #0 = { nounwind }
2326 attributes #1 = { nounwind readnone }