1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
7 ; --------------------------------------------------------------------------------
9 ; --------------------------------------------------------------------------------
11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
21 %tid = call i32 @llvm.amdgcn.workitem.id.x()
22 %tid.ext = sext i32 %tid to i64
23 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
24 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
25 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
26 %a = load volatile float, float addrspace(1)* %a.gep
27 %b = load volatile float, float addrspace(1)* %b.gep
28 %add = fadd float %a, %b
29 %fneg = fsub float -0.000000e+00, %add
30 store float %fneg, float addrspace(1)* %out.gep
34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
42 %tid = call i32 @llvm.amdgcn.workitem.id.x()
43 %tid.ext = sext i32 %tid to i64
44 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
45 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
46 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
47 %a = load volatile float, float addrspace(1)* %a.gep
48 %b = load volatile float, float addrspace(1)* %b.gep
49 %add = fadd float %a, %b
50 %fneg = fsub float -0.000000e+00, %add
51 store volatile float %fneg, float addrspace(1)* %out
52 store volatile float %add, float addrspace(1)* %out
56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
70 %tid = call i32 @llvm.amdgcn.workitem.id.x()
71 %tid.ext = sext i32 %tid to i64
72 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
73 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75 %a = load volatile float, float addrspace(1)* %a.gep
76 %b = load volatile float, float addrspace(1)* %b.gep
77 %add = fadd float %a, %b
78 %fneg = fsub float -0.000000e+00, %add
79 %use1 = fmul float %add, 4.0
80 store volatile float %fneg, float addrspace(1)* %out
81 store volatile float %use1, float addrspace(1)* %out
85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
89 ; GCN-SAFE: v_sub_f32_e32
90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
96 %tid = call i32 @llvm.amdgcn.workitem.id.x()
97 %tid.ext = sext i32 %tid to i64
98 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
99 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
100 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
101 %a = load volatile float, float addrspace(1)* %a.gep
102 %b = load volatile float, float addrspace(1)* %b.gep
103 %fneg.a = fsub float -0.000000e+00, %a
104 %add = fadd float %fneg.a, %b
105 %fneg = fsub float -0.000000e+00, %add
106 store volatile float %fneg, float addrspace(1)* %out
110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120 %tid = call i32 @llvm.amdgcn.workitem.id.x()
121 %tid.ext = sext i32 %tid to i64
122 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
123 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
124 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
125 %a = load volatile float, float addrspace(1)* %a.gep
126 %b = load volatile float, float addrspace(1)* %b.gep
127 %fneg.b = fsub float -0.000000e+00, %b
128 %add = fadd float %a, %fneg.b
129 %fneg = fsub float -0.000000e+00, %add
130 store volatile float %fneg, float addrspace(1)* %out
134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
147 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
148 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
149 %a = load volatile float, float addrspace(1)* %a.gep
150 %b = load volatile float, float addrspace(1)* %b.gep
151 %fneg.a = fsub float -0.000000e+00, %a
152 %fneg.b = fsub float -0.000000e+00, %b
153 %add = fadd float %fneg.a, %fneg.b
154 %fneg = fsub float -0.000000e+00, %add
155 store volatile float %fneg, float addrspace(1)* %out
159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %tid.ext = sext i32 %tid to i64
175 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
176 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
177 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
178 %a = load volatile float, float addrspace(1)* %a.gep
179 %b = load volatile float, float addrspace(1)* %b.gep
180 %fneg.a = fsub float -0.000000e+00, %a
181 %add = fadd float %fneg.a, %b
182 %fneg = fsub float -0.000000e+00, %add
183 store volatile float %fneg, float addrspace(1)* %out
184 store volatile float %fneg.a, float addrspace(1)* %out
188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
202 %tid.ext = sext i32 %tid to i64
203 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
204 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
206 %a = load volatile float, float addrspace(1)* %a.gep
207 %b = load volatile float, float addrspace(1)* %b.gep
208 %fneg.a = fsub float -0.000000e+00, %a
209 %add = fadd float %fneg.a, %b
210 %fneg = fsub float -0.000000e+00, %add
211 %use1 = fmul float %fneg.a, %c
212 store volatile float %fneg, float addrspace(1)* %out
213 store volatile float %use1, float addrspace(1)* %out
217 ; This one asserted with -enable-no-signed-zeros-fp-math
218 ; GCN-LABEL: {{^}}fneg_fadd_0:
219 ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
220 ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
221 ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
222 ; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
223 ; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
224 ; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
225 ; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
226 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
228 define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
230 %tmp7 = fdiv float 1.000000e+00, %tmp6
231 %tmp8 = fmul float 0.000000e+00, %tmp7
232 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
233 %.i188 = fadd float %tmp9, 0.000000e+00
234 %tmp10 = fcmp uge float %.i188, %tmp2
235 %tmp11 = fsub float -0.000000e+00, %.i188
236 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
237 %tmp12 = fcmp ule float %.i092, 0.000000e+00
238 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
242 ; --------------------------------------------------------------------------------
244 ; --------------------------------------------------------------------------------
246 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
247 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
248 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
249 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
250 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
251 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
252 %tid = call i32 @llvm.amdgcn.workitem.id.x()
253 %tid.ext = sext i32 %tid to i64
254 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
255 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
256 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
257 %a = load volatile float, float addrspace(1)* %a.gep
258 %b = load volatile float, float addrspace(1)* %b.gep
259 %mul = fmul float %a, %b
260 %fneg = fsub float -0.000000e+00, %mul
261 store float %fneg, float addrspace(1)* %out.gep
265 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
266 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
267 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
268 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
269 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
270 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
271 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
272 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
273 %tid = call i32 @llvm.amdgcn.workitem.id.x()
274 %tid.ext = sext i32 %tid to i64
275 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
276 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
277 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
278 %a = load volatile float, float addrspace(1)* %a.gep
279 %b = load volatile float, float addrspace(1)* %b.gep
280 %mul = fmul float %a, %b
281 %fneg = fsub float -0.000000e+00, %mul
282 store volatile float %fneg, float addrspace(1)* %out
283 store volatile float %mul, float addrspace(1)* %out
287 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
288 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
289 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
290 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
291 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
293 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
294 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
295 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
296 %tid = call i32 @llvm.amdgcn.workitem.id.x()
297 %tid.ext = sext i32 %tid to i64
298 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
299 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
300 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
301 %a = load volatile float, float addrspace(1)* %a.gep
302 %b = load volatile float, float addrspace(1)* %b.gep
303 %mul = fmul float %a, %b
304 %fneg = fsub float -0.000000e+00, %mul
305 %use1 = fmul float %mul, 4.0
306 store volatile float %fneg, float addrspace(1)* %out
307 store volatile float %use1, float addrspace(1)* %out
311 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
312 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
313 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
314 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
315 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
316 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
317 %tid = call i32 @llvm.amdgcn.workitem.id.x()
318 %tid.ext = sext i32 %tid to i64
319 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
320 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
321 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
322 %a = load volatile float, float addrspace(1)* %a.gep
323 %b = load volatile float, float addrspace(1)* %b.gep
324 %fneg.a = fsub float -0.000000e+00, %a
325 %mul = fmul float %fneg.a, %b
326 %fneg = fsub float -0.000000e+00, %mul
327 store volatile float %fneg, float addrspace(1)* %out
331 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
332 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
333 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
334 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
335 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
336 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
337 %tid = call i32 @llvm.amdgcn.workitem.id.x()
338 %tid.ext = sext i32 %tid to i64
339 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
340 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
341 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
342 %a = load volatile float, float addrspace(1)* %a.gep
343 %b = load volatile float, float addrspace(1)* %b.gep
344 %fneg.b = fsub float -0.000000e+00, %b
345 %mul = fmul float %a, %fneg.b
346 %fneg = fsub float -0.000000e+00, %mul
347 store volatile float %fneg, float addrspace(1)* %out
351 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
352 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
353 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
354 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
355 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
356 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
357 %tid = call i32 @llvm.amdgcn.workitem.id.x()
358 %tid.ext = sext i32 %tid to i64
359 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
360 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
361 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
362 %a = load volatile float, float addrspace(1)* %a.gep
363 %b = load volatile float, float addrspace(1)* %b.gep
364 %fneg.a = fsub float -0.000000e+00, %a
365 %fneg.b = fsub float -0.000000e+00, %b
366 %mul = fmul float %fneg.a, %fneg.b
367 %fneg = fsub float -0.000000e+00, %mul
368 store volatile float %fneg, float addrspace(1)* %out
372 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
373 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
374 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
375 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
376 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
378 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
379 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
380 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
381 %tid = call i32 @llvm.amdgcn.workitem.id.x()
382 %tid.ext = sext i32 %tid to i64
383 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
384 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
385 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
386 %a = load volatile float, float addrspace(1)* %a.gep
387 %b = load volatile float, float addrspace(1)* %b.gep
388 %fneg.a = fsub float -0.000000e+00, %a
389 %mul = fmul float %fneg.a, %b
390 %fneg = fsub float -0.000000e+00, %mul
391 store volatile float %fneg, float addrspace(1)* %out
392 store volatile float %fneg.a, float addrspace(1)* %out
396 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
397 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
398 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
399 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
400 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
401 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
402 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
403 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
404 %tid = call i32 @llvm.amdgcn.workitem.id.x()
405 %tid.ext = sext i32 %tid to i64
406 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
407 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
408 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
409 %a = load volatile float, float addrspace(1)* %a.gep
410 %b = load volatile float, float addrspace(1)* %b.gep
411 %fneg.a = fsub float -0.000000e+00, %a
412 %mul = fmul float %fneg.a, %b
413 %fneg = fsub float -0.000000e+00, %mul
414 %use1 = fmul float %fneg.a, %c
415 store volatile float %fneg, float addrspace(1)* %out
416 store volatile float %use1, float addrspace(1)* %out
420 ; --------------------------------------------------------------------------------
422 ; --------------------------------------------------------------------------------
424 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
425 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
426 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
427 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
428 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
429 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
430 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
431 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
432 %tid = call i32 @llvm.amdgcn.workitem.id.x()
433 %tid.ext = sext i32 %tid to i64
434 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
435 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
436 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
437 %a = load volatile float, float addrspace(1)* %a.gep
438 %b = load volatile float, float addrspace(1)* %b.gep
439 %min = call float @llvm.minnum.f32(float %a, float %b)
440 %fneg = fsub float -0.000000e+00, %min
441 store float %fneg, float addrspace(1)* %out.gep
445 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
448 ; GCN: v_max_f32_e64 v0, -v0, -v1
450 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
451 %min = call float @llvm.minnum.f32(float %a, float %b)
452 %fneg = fsub float -0.000000e+00, %min
456 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
457 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
458 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
459 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
460 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
461 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
462 %tid = call i32 @llvm.amdgcn.workitem.id.x()
463 %tid.ext = sext i32 %tid to i64
464 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
465 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
466 %a = load volatile float, float addrspace(1)* %a.gep
467 %min = call float @llvm.minnum.f32(float %a, float %a)
468 %min.fneg = fsub float -0.0, %min
469 store float %min.fneg, float addrspace(1)* %out.gep
473 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
475 ; GCN: v_max_f32_e64 v0, -v0, -v0
477 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
478 %min = call float @llvm.minnum.f32(float %a, float %a)
479 %min.fneg = fsub float -0.0, %min
483 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
484 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
485 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
486 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
487 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
488 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
489 %tid = call i32 @llvm.amdgcn.workitem.id.x()
490 %tid.ext = sext i32 %tid to i64
491 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
492 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
493 %a = load volatile float, float addrspace(1)* %a.gep
494 %min = call float @llvm.minnum.f32(float 4.0, float %a)
495 %fneg = fsub float -0.000000e+00, %min
496 store float %fneg, float addrspace(1)* %out.gep
500 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
502 ; GCN: v_max_f32_e64 v0, -v0, -4.0
504 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
505 %min = call float @llvm.minnum.f32(float 4.0, float %a)
506 %fneg = fsub float -0.000000e+00, %min
510 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
511 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
512 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
513 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
514 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
515 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
516 %tid = call i32 @llvm.amdgcn.workitem.id.x()
517 %tid.ext = sext i32 %tid to i64
518 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
519 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
520 %a = load volatile float, float addrspace(1)* %a.gep
521 %min = call float @llvm.minnum.f32(float -4.0, float %a)
522 %fneg = fsub float -0.000000e+00, %min
523 store float %fneg, float addrspace(1)* %out.gep
527 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
529 ; GCN: v_max_f32_e64 v0, -v0, 4.0
531 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
532 %min = call float @llvm.minnum.f32(float -4.0, float %a)
533 %fneg = fsub float -0.000000e+00, %min
537 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
538 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
539 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
540 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
541 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
542 %tid = call i32 @llvm.amdgcn.workitem.id.x()
543 %tid.ext = sext i32 %tid to i64
544 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
545 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
546 %a = load volatile float, float addrspace(1)* %a.gep
547 %min = call float @llvm.minnum.f32(float 0.0, float %a)
548 %fneg = fsub float -0.000000e+00, %min
549 store float %fneg, float addrspace(1)* %out.gep
553 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
554 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
555 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
556 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
557 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
558 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
559 %tid = call i32 @llvm.amdgcn.workitem.id.x()
560 %tid.ext = sext i32 %tid to i64
561 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
562 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
563 %a = load volatile float, float addrspace(1)* %a.gep
564 %min = call float @llvm.minnum.f32(float -0.0, float %a)
565 %fneg = fsub float -0.000000e+00, %min
566 store float %fneg, float addrspace(1)* %out.gep
570 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
571 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
573 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
574 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
576 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
577 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
578 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
580 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
581 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
582 %tid = call i32 @llvm.amdgcn.workitem.id.x()
583 %tid.ext = sext i32 %tid to i64
584 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
585 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
586 %a = load volatile float, float addrspace(1)* %a.gep
587 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
588 %fneg = fsub float -0.000000e+00, %min
589 store float %fneg, float addrspace(1)* %out.gep
593 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
594 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
596 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
597 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
599 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
600 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
602 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
603 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
604 %tid = call i32 @llvm.amdgcn.workitem.id.x()
605 %tid.ext = sext i32 %tid to i64
606 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
607 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
608 %a = load volatile float, float addrspace(1)* %a.gep
609 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
610 %fneg = fsub float -0.000000e+00, %min
611 store float %fneg, float addrspace(1)* %out.gep
615 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
616 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
618 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
619 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
620 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
622 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
623 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
624 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
626 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
627 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
628 %tid = call i32 @llvm.amdgcn.workitem.id.x()
629 %tid.ext = sext i32 %tid to i64
630 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
631 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
632 %a = load volatile half, half addrspace(1)* %a.gep
633 %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
634 %fneg = fsub half -0.000000e+00, %min
635 store half %fneg, half addrspace(1)* %out.gep
639 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
640 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
642 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
643 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
644 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
646 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
647 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
649 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
650 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
651 %tid = call i32 @llvm.amdgcn.workitem.id.x()
652 %tid.ext = sext i32 %tid to i64
653 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
654 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
655 %a = load volatile half, half addrspace(1)* %a.gep
656 %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
657 %fneg = fsub half -0.000000e+00, %min
658 store half %fneg, half addrspace(1)* %out.gep
662 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
663 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
665 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
666 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
667 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
668 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
670 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
671 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
673 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
674 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
675 %tid = call i32 @llvm.amdgcn.workitem.id.x()
676 %tid.ext = sext i32 %tid to i64
677 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
678 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
679 %a = load volatile double, double addrspace(1)* %a.gep
680 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
681 %fneg = fsub double -0.000000e+00, %min
682 store double %fneg, double addrspace(1)* %out.gep
686 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
687 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
689 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
690 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
691 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
692 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
694 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
695 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
697 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
698 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
699 %tid = call i32 @llvm.amdgcn.workitem.id.x()
700 %tid.ext = sext i32 %tid to i64
701 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
702 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
703 %a = load volatile double, double addrspace(1)* %a.gep
704 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
705 %fneg = fsub double -0.000000e+00, %min
706 store double %fneg, double addrspace(1)* %out.gep
710 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
712 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
714 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
715 %min = call float @llvm.minnum.f32(float -0.0, float %a)
716 %fneg = fsub float -0.000000e+00, %min
720 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
721 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
722 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
723 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
724 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
725 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
726 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
727 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
728 %tid = call i32 @llvm.amdgcn.workitem.id.x()
729 %tid.ext = sext i32 %tid to i64
730 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
731 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
732 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
733 %a = load volatile float, float addrspace(1)* %a.gep
734 %b = load volatile float, float addrspace(1)* %b.gep
735 %min = call float @llvm.minnum.f32(float 0.0, float %a)
736 %fneg = fsub float -0.000000e+00, %min
737 %mul = fmul float %fneg, %b
738 store float %mul, float addrspace(1)* %out.gep
742 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
743 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
744 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
746 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
748 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
749 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
751 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
752 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
753 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
755 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
756 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
757 %tid = call i32 @llvm.amdgcn.workitem.id.x()
758 %tid.ext = sext i32 %tid to i64
759 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
760 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
761 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
762 %a = load volatile float, float addrspace(1)* %a.gep
763 %b = load volatile float, float addrspace(1)* %b.gep
764 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
765 %fneg = fsub float -0.000000e+00, %min
766 %mul = fmul float %fneg, %b
767 store float %mul, float addrspace(1)* %out.gep
771 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
774 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
775 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
777 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
778 %min = call float @llvm.minnum.f32(float 0.0, float %a)
779 %fneg = fsub float -0.000000e+00, %min
780 %mul = fmul float %fneg, %b
784 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
785 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
786 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
787 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
788 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
789 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
790 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
791 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
792 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
793 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
794 %tid = call i32 @llvm.amdgcn.workitem.id.x()
795 %tid.ext = sext i32 %tid to i64
796 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
797 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
798 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
799 %a = load volatile float, float addrspace(1)* %a.gep
800 %b = load volatile float, float addrspace(1)* %b.gep
801 %min = call float @llvm.minnum.f32(float %a, float %b)
802 %fneg = fsub float -0.000000e+00, %min
803 %use1 = fmul float %min, 4.0
804 store volatile float %fneg, float addrspace(1)* %out
805 store volatile float %use1, float addrspace(1)* %out
809 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
812 ; GCN: v_max_f32_e64 v0, -v0, -v1
813 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
815 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
816 %min = call float @llvm.minnum.f32(float %a, float %b)
817 %fneg = fsub float -0.000000e+00, %min
818 %use1 = fmul float %min, 4.0
819 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
820 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
821 ret <2 x float> %ins1
824 ; --------------------------------------------------------------------------------
826 ; --------------------------------------------------------------------------------
829 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
830 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
831 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
832 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
833 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
834 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
835 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
836 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
837 %tid = call i32 @llvm.amdgcn.workitem.id.x()
838 %tid.ext = sext i32 %tid to i64
839 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
840 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
841 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
842 %a = load volatile float, float addrspace(1)* %a.gep
843 %b = load volatile float, float addrspace(1)* %b.gep
844 %max = call float @llvm.maxnum.f32(float %a, float %b)
845 %fneg = fsub float -0.000000e+00, %max
846 store float %fneg, float addrspace(1)* %out.gep
850 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
853 ; GCN: v_min_f32_e64 v0, -v0, -v1
855 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
856 %max = call float @llvm.maxnum.f32(float %a, float %b)
857 %fneg = fsub float -0.000000e+00, %max
861 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
862 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
863 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
864 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
865 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
866 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
867 %tid = call i32 @llvm.amdgcn.workitem.id.x()
868 %tid.ext = sext i32 %tid to i64
869 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
870 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
871 %a = load volatile float, float addrspace(1)* %a.gep
872 %max = call float @llvm.maxnum.f32(float %a, float %a)
873 %max.fneg = fsub float -0.0, %max
874 store float %max.fneg, float addrspace(1)* %out.gep
878 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
880 ; GCN: v_min_f32_e64 v0, -v0, -v0
882 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
883 %max = call float @llvm.maxnum.f32(float %a, float %a)
884 %max.fneg = fsub float -0.0, %max
888 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
889 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
890 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
891 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
892 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
893 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
894 %tid = call i32 @llvm.amdgcn.workitem.id.x()
895 %tid.ext = sext i32 %tid to i64
896 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
897 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
898 %a = load volatile float, float addrspace(1)* %a.gep
899 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
900 %fneg = fsub float -0.000000e+00, %max
901 store float %fneg, float addrspace(1)* %out.gep
905 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
907 ; GCN: v_min_f32_e64 v0, -v0, -4.0
909 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
910 %max = call float @llvm.maxnum.f32(float 4.0, float %a)
911 %fneg = fsub float -0.000000e+00, %max
915 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
916 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
917 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
918 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
919 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
920 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
921 %tid = call i32 @llvm.amdgcn.workitem.id.x()
922 %tid.ext = sext i32 %tid to i64
923 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
924 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
925 %a = load volatile float, float addrspace(1)* %a.gep
926 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
927 %fneg = fsub float -0.000000e+00, %max
928 store float %fneg, float addrspace(1)* %out.gep
932 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
934 ; GCN: v_min_f32_e64 v0, -v0, 4.0
936 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
937 %max = call float @llvm.maxnum.f32(float -4.0, float %a)
938 %fneg = fsub float -0.000000e+00, %max
942 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
943 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
944 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
945 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
946 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
947 %tid = call i32 @llvm.amdgcn.workitem.id.x()
948 %tid.ext = sext i32 %tid to i64
949 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
950 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
951 %a = load volatile float, float addrspace(1)* %a.gep
952 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
953 %fneg = fsub float -0.000000e+00, %max
954 store float %fneg, float addrspace(1)* %out.gep
958 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
959 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
960 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
961 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
962 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
963 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
964 %tid = call i32 @llvm.amdgcn.workitem.id.x()
965 %tid.ext = sext i32 %tid to i64
966 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
967 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
968 %a = load volatile float, float addrspace(1)* %a.gep
969 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
970 %fneg = fsub float -0.000000e+00, %max
971 store float %fneg, float addrspace(1)* %out.gep
975 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
977 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
979 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
980 %max = call float @llvm.maxnum.f32(float -0.0, float %a)
981 %fneg = fsub float -0.000000e+00, %max
985 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
986 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
987 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
988 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
989 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
990 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
991 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
992 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
993 %tid = call i32 @llvm.amdgcn.workitem.id.x()
994 %tid.ext = sext i32 %tid to i64
995 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
996 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
997 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
998 %a = load volatile float, float addrspace(1)* %a.gep
999 %b = load volatile float, float addrspace(1)* %b.gep
1000 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1001 %fneg = fsub float -0.000000e+00, %max
1002 %mul = fmul float %fneg, %b
1003 store float %mul, float addrspace(1)* %out.gep
1007 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1010 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
1011 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
1012 ; GCN-NEXT: ; return
1013 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
1014 %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1015 %fneg = fsub float -0.000000e+00, %max
1016 %mul = fmul float %fneg, %b
1020 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1021 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1022 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1023 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
1024 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1025 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1026 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1027 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1028 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1029 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1030 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1031 %tid.ext = sext i32 %tid to i64
1032 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1033 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1034 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1035 %a = load volatile float, float addrspace(1)* %a.gep
1036 %b = load volatile float, float addrspace(1)* %b.gep
1037 %max = call float @llvm.maxnum.f32(float %a, float %b)
1038 %fneg = fsub float -0.000000e+00, %max
1039 %use1 = fmul float %max, 4.0
1040 store volatile float %fneg, float addrspace(1)* %out
1041 store volatile float %use1, float addrspace(1)* %out
1045 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1048 ; GCN: v_min_f32_e64 v0, -v0, -v1
1049 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1050 ; GCN-NEXT: ; return
1051 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1052 %max = call float @llvm.maxnum.f32(float %a, float %b)
1053 %fneg = fsub float -0.000000e+00, %max
1054 %use1 = fmul float %max, 4.0
1055 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1056 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1057 ret <2 x float> %ins1
1060 ; --------------------------------------------------------------------------------
1062 ; --------------------------------------------------------------------------------
1064 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1065 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1066 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1067 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1069 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1070 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1072 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1073 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1074 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1075 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1076 %tid.ext = sext i32 %tid to i64
1077 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1078 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1079 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1080 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1081 %a = load volatile float, float addrspace(1)* %a.gep
1082 %b = load volatile float, float addrspace(1)* %b.gep
1083 %c = load volatile float, float addrspace(1)* %c.gep
1084 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1085 %fneg = fsub float -0.000000e+00, %fma
1086 store float %fneg, float addrspace(1)* %out.gep
1090 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1091 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1092 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1093 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1094 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1095 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1096 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1097 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1098 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1099 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1100 %tid.ext = sext i32 %tid to i64
1101 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1102 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1103 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1104 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1105 %a = load volatile float, float addrspace(1)* %a.gep
1106 %b = load volatile float, float addrspace(1)* %b.gep
1107 %c = load volatile float, float addrspace(1)* %c.gep
1108 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1109 %fneg = fsub float -0.000000e+00, %fma
1110 store volatile float %fneg, float addrspace(1)* %out
1111 store volatile float %fma, float addrspace(1)* %out
1115 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1116 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1117 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1118 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1120 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1121 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1122 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1124 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1125 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1127 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1128 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1129 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1130 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1131 %tid.ext = sext i32 %tid to i64
1132 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1133 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1134 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1135 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1136 %a = load volatile float, float addrspace(1)* %a.gep
1137 %b = load volatile float, float addrspace(1)* %b.gep
1138 %c = load volatile float, float addrspace(1)* %c.gep
1139 %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1140 %fneg = fsub float -0.000000e+00, %fma
1141 %use1 = fmul float %fma, 4.0
1142 store volatile float %fneg, float addrspace(1)* %out
1143 store volatile float %use1, float addrspace(1)* %out
1147 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1148 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1149 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1150 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1152 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1153 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1155 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1156 ; GCN-NSZ-NOT: [[FMA]]
1157 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1158 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1159 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1160 %tid.ext = sext i32 %tid to i64
1161 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1162 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1163 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1164 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1165 %a = load volatile float, float addrspace(1)* %a.gep
1166 %b = load volatile float, float addrspace(1)* %b.gep
1167 %c = load volatile float, float addrspace(1)* %c.gep
1168 %fneg.a = fsub float -0.000000e+00, %a
1169 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1170 %fneg = fsub float -0.000000e+00, %fma
1171 store volatile float %fneg, float addrspace(1)* %out
1175 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1176 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1177 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1178 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1180 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1181 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1183 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1184 ; GCN-NSZ-NOT: [[FMA]]
1185 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1186 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1187 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1188 %tid.ext = sext i32 %tid to i64
1189 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1190 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1191 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1192 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1193 %a = load volatile float, float addrspace(1)* %a.gep
1194 %b = load volatile float, float addrspace(1)* %b.gep
1195 %c = load volatile float, float addrspace(1)* %c.gep
1196 %fneg.b = fsub float -0.000000e+00, %b
1197 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1198 %fneg = fsub float -0.000000e+00, %fma
1199 store volatile float %fneg, float addrspace(1)* %out
1203 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1204 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1205 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1206 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1208 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1209 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1211 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1212 ; GCN-NSZ-NOT: [[FMA]]
1213 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1214 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1215 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1216 %tid.ext = sext i32 %tid to i64
1217 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1218 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1219 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1220 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1221 %a = load volatile float, float addrspace(1)* %a.gep
1222 %b = load volatile float, float addrspace(1)* %b.gep
1223 %c = load volatile float, float addrspace(1)* %c.gep
1224 %fneg.a = fsub float -0.000000e+00, %a
1225 %fneg.b = fsub float -0.000000e+00, %b
1226 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1227 %fneg = fsub float -0.000000e+00, %fma
1228 store volatile float %fneg, float addrspace(1)* %out
1232 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1233 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1234 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1235 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1237 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1238 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1240 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1241 ; GCN-NSZ-NOT: [[FMA]]
1242 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1243 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1244 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1245 %tid.ext = sext i32 %tid to i64
1246 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1247 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1248 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1249 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1250 %a = load volatile float, float addrspace(1)* %a.gep
1251 %b = load volatile float, float addrspace(1)* %b.gep
1252 %c = load volatile float, float addrspace(1)* %c.gep
1253 %fneg.a = fsub float -0.000000e+00, %a
1254 %fneg.c = fsub float -0.000000e+00, %c
1255 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1256 %fneg = fsub float -0.000000e+00, %fma
1257 store volatile float %fneg, float addrspace(1)* %out
1261 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1262 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1263 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1264 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1266 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1267 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1269 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1270 ; GCN-NSZ-NOT: [[FMA]]
1271 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1272 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1273 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1274 %tid.ext = sext i32 %tid to i64
1275 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1276 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1277 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1278 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1279 %a = load volatile float, float addrspace(1)* %a.gep
1280 %b = load volatile float, float addrspace(1)* %b.gep
1281 %c = load volatile float, float addrspace(1)* %c.gep
1282 %fneg.c = fsub float -0.000000e+00, %c
1283 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1284 %fneg = fsub float -0.000000e+00, %fma
1285 store volatile float %fneg, float addrspace(1)* %out
1289 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1290 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1291 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1292 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1294 ; GCN-SAFE: v_xor_b32
1295 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1296 ; GCN-SAFE: v_xor_b32
1298 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1299 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1301 ; GCN-NSZ-NOT: [[FMA]]
1302 ; GCN-NSZ-NOT: [[NEG_A]]
1303 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1304 ; GCN-NSZ-NOT: [[NEG_A]]
1305 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1306 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1307 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1308 %tid.ext = sext i32 %tid to i64
1309 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1310 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1311 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1312 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1313 %a = load volatile float, float addrspace(1)* %a.gep
1314 %b = load volatile float, float addrspace(1)* %b.gep
1315 %c = load volatile float, float addrspace(1)* %c.gep
1316 %fneg.a = fsub float -0.000000e+00, %a
1317 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1318 %fneg = fsub float -0.000000e+00, %fma
1319 store volatile float %fneg, float addrspace(1)* %out
1320 store volatile float %fneg.a, float addrspace(1)* %out
1324 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1325 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1326 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1327 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1329 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1330 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1331 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1333 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1334 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1335 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1336 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1337 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1338 %tid.ext = sext i32 %tid to i64
1339 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1340 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1341 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1342 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1343 %a = load volatile float, float addrspace(1)* %a.gep
1344 %b = load volatile float, float addrspace(1)* %b.gep
1345 %c = load volatile float, float addrspace(1)* %c.gep
1346 %fneg.a = fsub float -0.000000e+00, %a
1347 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1348 %fneg = fsub float -0.000000e+00, %fma
1349 %use1 = fmul float %fneg.a, %d
1350 store volatile float %fneg, float addrspace(1)* %out
1351 store volatile float %use1, float addrspace(1)* %out
1355 ; --------------------------------------------------------------------------------
1357 ; --------------------------------------------------------------------------------
1359 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1360 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1361 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1362 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1364 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1365 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1367 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1368 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1369 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1370 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1371 %tid.ext = sext i32 %tid to i64
1372 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1373 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1374 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1375 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1376 %a = load volatile float, float addrspace(1)* %a.gep
1377 %b = load volatile float, float addrspace(1)* %b.gep
1378 %c = load volatile float, float addrspace(1)* %c.gep
1379 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1380 %fneg = fsub float -0.000000e+00, %fma
1381 store float %fneg, float addrspace(1)* %out.gep
1385 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1386 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1387 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1388 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1390 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1391 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1392 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1394 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
1395 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1397 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1398 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1399 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1400 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1401 %tid.ext = sext i32 %tid to i64
1402 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1403 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1404 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1405 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1406 %a = load volatile float, float addrspace(1)* %a.gep
1407 %b = load volatile float, float addrspace(1)* %b.gep
1408 %c = load volatile float, float addrspace(1)* %c.gep
1409 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1410 %fneg = fsub float -0.000000e+00, %fma
1411 %use1 = fmul float %fma, 4.0
1412 store volatile float %fneg, float addrspace(1)* %out
1413 store volatile float %use1, float addrspace(1)* %out
1417 ; --------------------------------------------------------------------------------
1419 ; --------------------------------------------------------------------------------
1421 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1422 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1423 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1424 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1425 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1426 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1427 %tid.ext = sext i32 %tid to i64
1428 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1429 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1430 %a = load volatile float, float addrspace(1)* %a.gep
1431 %fpext = fpext float %a to double
1432 %fneg = fsub double -0.000000e+00, %fpext
1433 store double %fneg, double addrspace(1)* %out.gep
1437 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1438 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1439 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1440 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1441 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1442 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1443 %tid.ext = sext i32 %tid to i64
1444 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1445 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1446 %a = load volatile float, float addrspace(1)* %a.gep
1447 %fneg.a = fsub float -0.000000e+00, %a
1448 %fpext = fpext float %fneg.a to double
1449 %fneg = fsub double -0.000000e+00, %fpext
1450 store double %fneg, double addrspace(1)* %out.gep
1454 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1455 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1456 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1457 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1458 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1459 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1460 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1461 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1462 %tid.ext = sext i32 %tid to i64
1463 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1464 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1465 %a = load volatile float, float addrspace(1)* %a.gep
1466 %fneg.a = fsub float -0.000000e+00, %a
1467 %fpext = fpext float %fneg.a to double
1468 %fneg = fsub double -0.000000e+00, %fpext
1469 store volatile double %fneg, double addrspace(1)* %out.gep
1470 store volatile float %fneg.a, float addrspace(1)* undef
1474 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1475 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1476 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1477 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1478 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1479 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1480 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1481 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1482 %tid.ext = sext i32 %tid to i64
1483 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1484 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1485 %a = load volatile float, float addrspace(1)* %a.gep
1486 %fpext = fpext float %a to double
1487 %fneg = fsub double -0.000000e+00, %fpext
1488 store volatile double %fneg, double addrspace(1)* %out.gep
1489 store volatile double %fpext, double addrspace(1)* undef
1493 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1494 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1495 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1496 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1497 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1498 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1499 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1500 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1501 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1502 %tid.ext = sext i32 %tid to i64
1503 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1504 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1505 %a = load volatile float, float addrspace(1)* %a.gep
1506 %fpext = fpext float %a to double
1507 %fneg = fsub double -0.000000e+00, %fpext
1508 %mul = fmul double %fpext, 4.0
1509 store volatile double %fneg, double addrspace(1)* %out.gep
1510 store volatile double %mul, double addrspace(1)* %out.gep
1514 ; FIXME: Source modifiers not folded for f16->f32
1515 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1516 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1517 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1518 %tid.ext = sext i32 %tid to i64
1519 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1520 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1521 %a = load volatile half, half addrspace(1)* %a.gep
1522 %fpext = fpext half %a to float
1523 %fneg = fsub float -0.000000e+00, %fpext
1524 store volatile float %fneg, float addrspace(1)* %out.gep
1525 store volatile float %fpext, float addrspace(1)* %out.gep
1529 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1530 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1531 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1532 %tid.ext = sext i32 %tid to i64
1533 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1534 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1535 %a = load volatile half, half addrspace(1)* %a.gep
1536 %fpext = fpext half %a to float
1537 %fneg = fsub float -0.000000e+00, %fpext
1538 %mul = fmul float %fpext, 4.0
1539 store volatile float %fneg, float addrspace(1)* %out.gep
1540 store volatile float %mul, float addrspace(1)* %out.gep
1544 ; --------------------------------------------------------------------------------
1546 ; --------------------------------------------------------------------------------
1548 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1549 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1550 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1551 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1552 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1553 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1554 %tid.ext = sext i32 %tid to i64
1555 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1556 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1557 %a = load volatile double, double addrspace(1)* %a.gep
1558 %fpround = fptrunc double %a to float
1559 %fneg = fsub float -0.000000e+00, %fpround
1560 store float %fneg, float addrspace(1)* %out.gep
1564 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1565 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1566 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1567 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1568 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1569 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1570 %tid.ext = sext i32 %tid to i64
1571 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1572 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1573 %a = load volatile double, double addrspace(1)* %a.gep
1574 %fneg.a = fsub double -0.000000e+00, %a
1575 %fpround = fptrunc double %fneg.a to float
1576 %fneg = fsub float -0.000000e+00, %fpround
1577 store float %fneg, float addrspace(1)* %out.gep
1581 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1582 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1583 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1584 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1585 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1586 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1587 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1588 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1589 %tid.ext = sext i32 %tid to i64
1590 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1591 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1592 %a = load volatile double, double addrspace(1)* %a.gep
1593 %fneg.a = fsub double -0.000000e+00, %a
1594 %fpround = fptrunc double %fneg.a to float
1595 %fneg = fsub float -0.000000e+00, %fpround
1596 store volatile float %fneg, float addrspace(1)* %out.gep
1597 store volatile double %fneg.a, double addrspace(1)* undef
1601 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1602 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1603 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1604 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1606 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1607 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1608 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1609 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1610 %tid.ext = sext i32 %tid to i64
1611 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1612 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1613 %a = load volatile double, double addrspace(1)* %a.gep
1614 %fneg.a = fsub double -0.000000e+00, %a
1615 %fpround = fptrunc double %fneg.a to float
1616 %fneg = fsub float -0.000000e+00, %fpround
1617 %use1 = fmul double %fneg.a, %c
1618 store volatile float %fneg, float addrspace(1)* %out.gep
1619 store volatile double %use1, double addrspace(1)* undef
1623 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1624 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1625 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1626 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1627 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1628 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1629 %tid.ext = sext i32 %tid to i64
1630 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1631 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1632 %a = load volatile float, float addrspace(1)* %a.gep
1633 %fpround = fptrunc float %a to half
1634 %fneg = fsub half -0.000000e+00, %fpround
1635 store half %fneg, half addrspace(1)* %out.gep
1639 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1640 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1641 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1642 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1643 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1644 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1645 %tid.ext = sext i32 %tid to i64
1646 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1647 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1648 %a = load volatile float, float addrspace(1)* %a.gep
1649 %fneg.a = fsub float -0.000000e+00, %a
1650 %fpround = fptrunc float %fneg.a to half
1651 %fneg = fsub half -0.000000e+00, %fpround
1652 store half %fneg, half addrspace(1)* %out.gep
1656 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1657 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1658 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1659 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1660 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1661 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1662 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1663 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1664 %tid.ext = sext i32 %tid to i64
1665 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1666 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1667 %a = load volatile double, double addrspace(1)* %a.gep
1668 %fpround = fptrunc double %a to float
1669 %fneg = fsub float -0.000000e+00, %fpround
1670 store volatile float %fneg, float addrspace(1)* %out.gep
1671 store volatile float %fpround, float addrspace(1)* %out.gep
1675 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1676 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1677 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1678 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1679 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1680 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1681 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1682 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1683 %tid.ext = sext i32 %tid to i64
1684 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1685 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1686 %a = load volatile float, float addrspace(1)* %a.gep
1687 %fneg.a = fsub float -0.000000e+00, %a
1688 %fpround = fptrunc float %fneg.a to half
1689 %fneg = fsub half -0.000000e+00, %fpround
1690 store volatile half %fneg, half addrspace(1)* %out.gep
1691 store volatile float %fneg.a, float addrspace(1)* undef
1695 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1697 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1698 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1699 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1700 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1701 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1702 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1703 %tid.ext = sext i32 %tid to i64
1704 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1705 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1706 %a = load volatile float, float addrspace(1)* %a.gep
1707 %fneg.a = fsub float -0.000000e+00, %a
1708 %fpround = fptrunc float %fneg.a to half
1709 %fneg = fsub half -0.000000e+00, %fpround
1710 %use1 = fmul float %fneg.a, %c
1711 store volatile half %fneg, half addrspace(1)* %out.gep
1712 store volatile float %use1, float addrspace(1)* undef
1716 ; --------------------------------------------------------------------------------
1718 ; --------------------------------------------------------------------------------
1720 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1721 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1722 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1723 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1724 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1725 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1726 %tid.ext = sext i32 %tid to i64
1727 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1728 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1729 %a = load volatile float, float addrspace(1)* %a.gep
1730 %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1731 %fneg = fsub float -0.000000e+00, %rcp
1732 store float %fneg, float addrspace(1)* %out.gep
1736 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1737 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1738 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1739 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1740 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1741 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1742 %tid.ext = sext i32 %tid to i64
1743 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1744 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1745 %a = load volatile float, float addrspace(1)* %a.gep
1746 %fneg.a = fsub float -0.000000e+00, %a
1747 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1748 %fneg = fsub float -0.000000e+00, %rcp
1749 store float %fneg, float addrspace(1)* %out.gep
1753 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1754 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1755 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1756 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1757 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1758 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1759 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1760 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1761 %tid.ext = sext i32 %tid to i64
1762 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1763 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1764 %a = load volatile float, float addrspace(1)* %a.gep
1765 %fneg.a = fsub float -0.000000e+00, %a
1766 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1767 %fneg = fsub float -0.000000e+00, %rcp
1768 store volatile float %fneg, float addrspace(1)* %out.gep
1769 store volatile float %fneg.a, float addrspace(1)* undef
1773 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1774 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1775 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1776 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1777 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1778 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1779 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1780 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1781 %tid.ext = sext i32 %tid to i64
1782 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1783 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1784 %a = load volatile float, float addrspace(1)* %a.gep
1785 %fneg.a = fsub float -0.000000e+00, %a
1786 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1787 %fneg = fsub float -0.000000e+00, %rcp
1788 %use1 = fmul float %fneg.a, %c
1789 store volatile float %fneg, float addrspace(1)* %out.gep
1790 store volatile float %use1, float addrspace(1)* undef
1794 ; --------------------------------------------------------------------------------
1796 ; --------------------------------------------------------------------------------
1798 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1799 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1800 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1801 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1802 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1803 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1804 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1805 %tid.ext = sext i32 %tid to i64
1806 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1807 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1808 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1809 %a = load volatile float, float addrspace(1)* %a.gep
1810 %b = load volatile float, float addrspace(1)* %b.gep
1811 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1812 %fneg = fsub float -0.000000e+00, %mul
1813 store float %fneg, float addrspace(1)* %out.gep
1817 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1818 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1819 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1820 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1821 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1822 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1823 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1824 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1825 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1826 %tid.ext = sext i32 %tid to i64
1827 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1828 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1829 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1830 %a = load volatile float, float addrspace(1)* %a.gep
1831 %b = load volatile float, float addrspace(1)* %b.gep
1832 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1833 %fneg = fsub float -0.000000e+00, %mul
1834 store volatile float %fneg, float addrspace(1)* %out
1835 store volatile float %mul, float addrspace(1)* %out
1839 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1840 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1841 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1842 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1843 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1844 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1845 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1846 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1847 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1848 %tid.ext = sext i32 %tid to i64
1849 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1850 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1851 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1852 %a = load volatile float, float addrspace(1)* %a.gep
1853 %b = load volatile float, float addrspace(1)* %b.gep
1854 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1855 %fneg = fsub float -0.000000e+00, %mul
1856 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1857 store volatile float %fneg, float addrspace(1)* %out
1858 store volatile float %use1, float addrspace(1)* %out
1862 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1863 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1864 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1865 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1866 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1867 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1868 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1869 %tid.ext = sext i32 %tid to i64
1870 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1871 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1872 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1873 %a = load volatile float, float addrspace(1)* %a.gep
1874 %b = load volatile float, float addrspace(1)* %b.gep
1875 %fneg.a = fsub float -0.000000e+00, %a
1876 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1877 %fneg = fsub float -0.000000e+00, %mul
1878 store volatile float %fneg, float addrspace(1)* %out
1882 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1883 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1884 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1885 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1886 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1887 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1888 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1889 %tid.ext = sext i32 %tid to i64
1890 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1891 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1892 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1893 %a = load volatile float, float addrspace(1)* %a.gep
1894 %b = load volatile float, float addrspace(1)* %b.gep
1895 %fneg.b = fsub float -0.000000e+00, %b
1896 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1897 %fneg = fsub float -0.000000e+00, %mul
1898 store volatile float %fneg, float addrspace(1)* %out
1902 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1903 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1904 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1905 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1906 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1907 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1908 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1909 %tid.ext = sext i32 %tid to i64
1910 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1911 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1912 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1913 %a = load volatile float, float addrspace(1)* %a.gep
1914 %b = load volatile float, float addrspace(1)* %b.gep
1915 %fneg.a = fsub float -0.000000e+00, %a
1916 %fneg.b = fsub float -0.000000e+00, %b
1917 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1918 %fneg = fsub float -0.000000e+00, %mul
1919 store volatile float %fneg, float addrspace(1)* %out
1923 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1924 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1925 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1926 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1927 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1928 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1929 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1930 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1931 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1932 %tid.ext = sext i32 %tid to i64
1933 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1934 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1935 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1936 %a = load volatile float, float addrspace(1)* %a.gep
1937 %b = load volatile float, float addrspace(1)* %b.gep
1938 %fneg.a = fsub float -0.000000e+00, %a
1939 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1940 %fneg = fsub float -0.000000e+00, %mul
1941 store volatile float %fneg, float addrspace(1)* %out
1942 store volatile float %fneg.a, float addrspace(1)* %out
1946 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1947 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1948 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1949 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1950 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1951 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1952 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1953 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1954 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1955 %tid.ext = sext i32 %tid to i64
1956 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1957 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1958 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1959 %a = load volatile float, float addrspace(1)* %a.gep
1960 %b = load volatile float, float addrspace(1)* %b.gep
1961 %fneg.a = fsub float -0.000000e+00, %a
1962 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1963 %fneg = fsub float -0.000000e+00, %mul
1964 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1965 store volatile float %fneg, float addrspace(1)* %out
1966 store volatile float %use1, float addrspace(1)* %out
1970 ; --------------------------------------------------------------------------------
1972 ; --------------------------------------------------------------------------------
1974 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1975 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1976 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1977 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1978 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1979 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1980 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1981 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1982 %tid.ext = sext i32 %tid to i64
1983 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1984 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1985 %a = load volatile float, float addrspace(1)* %a.gep
1986 %sin = call float @llvm.sin.f32(float %a)
1987 %fneg = fsub float -0.000000e+00, %sin
1988 store float %fneg, float addrspace(1)* %out.gep
1992 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1993 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1994 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1995 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1996 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1997 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1998 %tid.ext = sext i32 %tid to i64
1999 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2000 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2001 %a = load volatile float, float addrspace(1)* %a.gep
2002 %sin = call float @llvm.amdgcn.sin.f32(float %a)
2003 %fneg = fsub float -0.0, %sin
2004 store float %fneg, float addrspace(1)* %out.gep
2008 ; --------------------------------------------------------------------------------
2010 ; --------------------------------------------------------------------------------
2012 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
2013 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2014 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2015 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2016 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2017 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2018 %tid.ext = sext i32 %tid to i64
2019 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2020 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2021 %a = load volatile float, float addrspace(1)* %a.gep
2022 %trunc = call float @llvm.trunc.f32(float %a)
2023 %fneg = fsub float -0.0, %trunc
2024 store float %fneg, float addrspace(1)* %out.gep
2028 ; --------------------------------------------------------------------------------
2030 ; --------------------------------------------------------------------------------
2032 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2033 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2034 ; GCN: v_trunc_f32_e32
2035 ; GCN: v_sub_f32_e32
2036 ; GCN: v_cndmask_b32
2038 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2039 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2041 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2042 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2043 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2044 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2045 %tid.ext = sext i32 %tid to i64
2046 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2047 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2048 %a = load volatile float, float addrspace(1)* %a.gep
2049 %round = call float @llvm.round.f32(float %a)
2050 %fneg = fsub float -0.0, %round
2051 store float %fneg, float addrspace(1)* %out.gep
2055 ; --------------------------------------------------------------------------------
2057 ; --------------------------------------------------------------------------------
2059 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2060 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2061 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2062 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2063 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2064 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2065 %tid.ext = sext i32 %tid to i64
2066 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2067 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2068 %a = load volatile float, float addrspace(1)* %a.gep
2069 %rint = call float @llvm.rint.f32(float %a)
2070 %fneg = fsub float -0.0, %rint
2071 store float %fneg, float addrspace(1)* %out.gep
2075 ; --------------------------------------------------------------------------------
2077 ; --------------------------------------------------------------------------------
2079 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2080 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2081 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2082 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2083 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2084 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2085 %tid.ext = sext i32 %tid to i64
2086 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2087 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2088 %a = load volatile float, float addrspace(1)* %a.gep
2089 %nearbyint = call float @llvm.nearbyint.f32(float %a)
2090 %fneg = fsub float -0.0, %nearbyint
2091 store float %fneg, float addrspace(1)* %out.gep
2095 ; --------------------------------------------------------------------------------
2096 ; fcanonicalize tests
2097 ; --------------------------------------------------------------------------------
2099 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2100 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2101 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2102 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2103 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2104 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2105 %tid.ext = sext i32 %tid to i64
2106 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2107 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2108 %a = load volatile float, float addrspace(1)* %a.gep
2109 %trunc = call float @llvm.canonicalize.f32(float %a)
2110 %fneg = fsub float -0.0, %trunc
2111 store float %fneg, float addrspace(1)* %out.gep
2115 ; --------------------------------------------------------------------------------
2117 ; --------------------------------------------------------------------------------
2119 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2120 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2121 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2122 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2123 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2124 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2125 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2126 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2127 %tid.ext = sext i32 %tid to i64
2128 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2129 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2130 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2131 %a = load volatile float, float addrspace(1)* %a.gep
2132 %b = load volatile float, float addrspace(1)* %b.gep
2133 %mul = fmul float %a, %b
2134 %fneg = fsub float -0.0, %mul
2135 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2136 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2137 store volatile float %intrp0, float addrspace(1)* %out.gep
2138 store volatile float %intrp1, float addrspace(1)* %out.gep
2142 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2143 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2144 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2145 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2146 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2147 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2148 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2149 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2150 %tid.ext = sext i32 %tid to i64
2151 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2152 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2153 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2154 %a = load volatile float, float addrspace(1)* %a.gep
2155 %b = load volatile float, float addrspace(1)* %b.gep
2156 %mul = fmul float %a, %b
2157 %fneg = fsub float -0.0, %mul
2158 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2159 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2160 store volatile float %intrp0, float addrspace(1)* %out.gep
2161 store volatile float %intrp1, float addrspace(1)* %out.gep
2165 ; --------------------------------------------------------------------------------
2167 ; --------------------------------------------------------------------------------
2169 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2170 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2171 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2172 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2173 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2174 ; GCN: s_cbranch_scc0
2176 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2179 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2180 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2181 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2183 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2185 %tid.ext = sext i32 %tid to i64
2186 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2187 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2188 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2189 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2190 %a = load volatile float, float addrspace(1)* %a.gep
2191 %b = load volatile float, float addrspace(1)* %b.gep
2192 %c = load volatile float, float addrspace(1)* %c.gep
2193 %mul = fmul float %a, %b
2194 %fneg = fsub float -0.0, %mul
2195 %cmp0 = icmp eq i32 %d, 0
2196 br i1 %cmp0, label %if, label %endif
2199 %mul1 = fmul float %fneg, %c
2200 store volatile float %mul1, float addrspace(1)* %out.gep
2204 store volatile float %mul, float addrspace(1)* %out.gep
2208 ; --------------------------------------------------------------------------------
2210 ; --------------------------------------------------------------------------------
2212 ; Can't fold into use, so should fold into source
2213 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2214 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2215 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2216 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2217 ; GCN: ; use [[MUL]]
2218 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2219 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2220 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2221 %tid.ext = sext i32 %tid to i64
2222 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2223 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2224 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2225 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2226 %a = load volatile float, float addrspace(1)* %a.gep
2227 %b = load volatile float, float addrspace(1)* %b.gep
2228 %c = load volatile float, float addrspace(1)* %c.gep
2229 %mul = fmul float %a, %b
2230 %fneg = fsub float -0.0, %mul
2231 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2232 store volatile float %fneg, float addrspace(1)* %out.gep
2236 ; --------------------------------------------------------------------------------
2238 ; --------------------------------------------------------------------------------
2240 ; Can't fold into use, so should fold into source
2241 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2242 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2243 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2244 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2245 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2246 ; GCN: ; use [[NEG]]
2247 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2248 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2249 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2250 %tid.ext = sext i32 %tid to i64
2251 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2252 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2253 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2254 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2255 %a = load volatile float, float addrspace(1)* %a.gep
2256 %b = load volatile float, float addrspace(1)* %b.gep
2257 %c = load volatile float, float addrspace(1)* %c.gep
2258 %mul = fmul float %a, %b
2259 %fneg = fsub float -0.0, %mul
2260 call void asm sideeffect "; use $0", "v"(float %fneg) #0
2261 store volatile float %mul, float addrspace(1)* %out.gep
2265 ; --------------------------------------------------------------------------------
2266 ; code size regression tests
2267 ; --------------------------------------------------------------------------------
2269 ; There are multiple users of the fneg that must use a VOP3
2270 ; instruction, so there is no penalty
2271 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2272 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2273 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2274 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2276 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2277 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2279 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2280 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2281 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2282 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2283 %tid.ext = sext i32 %tid to i64
2284 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2285 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2286 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2287 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2288 %a = load volatile float, float addrspace(1)* %a.gep
2289 %b = load volatile float, float addrspace(1)* %b.gep
2290 %c = load volatile float, float addrspace(1)* %c.gep
2292 %fneg.a = fsub float -0.0, %a
2293 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2294 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2296 store volatile float %fma0, float addrspace(1)* %out
2297 store volatile float %fma1, float addrspace(1)* %out
2301 ; There are multiple users, but both require using a larger encoding
2304 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2305 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2306 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2307 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2309 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2310 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2311 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2312 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2313 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2314 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2315 %tid.ext = sext i32 %tid to i64
2316 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2317 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2318 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2319 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2320 %a = load volatile float, float addrspace(1)* %a.gep
2321 %b = load volatile float, float addrspace(1)* %b.gep
2322 %c = load volatile float, float addrspace(1)* %c.gep
2324 %fneg.a = fsub float -0.0, %a
2325 %mul0 = fmul float %fneg.a, %b
2326 %mul1 = fmul float %fneg.a, %c
2328 store volatile float %mul0, float addrspace(1)* %out
2329 store volatile float %mul1, float addrspace(1)* %out
2333 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2334 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2335 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2336 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2337 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2339 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2340 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2342 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2343 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2344 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2345 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2346 %tid.ext = sext i32 %tid to i64
2347 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2348 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2349 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2350 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2351 %a = load volatile float, float addrspace(1)* %a.gep
2352 %b = load volatile float, float addrspace(1)* %b.gep
2353 %c = load volatile float, float addrspace(1)* %c.gep
2355 %fneg.a = fsub float -0.0, %a
2356 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2357 %mul1 = fmul float %fneg.a, %c
2359 store volatile float %fma0, float addrspace(1)* %out
2360 store volatile float %mul1, float addrspace(1)* %out
2364 ; The use of the fneg requires a code size increase, but folding into
2365 ; the source does not
2367 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2368 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2369 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2370 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2371 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2373 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2374 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2375 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2377 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2378 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2379 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2381 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2382 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2383 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2384 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2385 %tid.ext = sext i32 %tid to i64
2386 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2387 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2388 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2389 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2390 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2391 %a = load volatile float, float addrspace(1)* %a.gep
2392 %b = load volatile float, float addrspace(1)* %b.gep
2393 %c = load volatile float, float addrspace(1)* %c.gep
2394 %d = load volatile float, float addrspace(1)* %d.gep
2396 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2397 %fneg.fma0 = fsub float -0.0, %fma0
2398 %mul1 = fmul float %fneg.fma0, %c
2399 %mul2 = fmul float %fneg.fma0, %d
2401 store volatile float %mul1, float addrspace(1)* %out
2402 store volatile float %mul2, float addrspace(1)* %out
2406 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2407 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2408 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2409 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2410 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2412 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2413 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2414 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2416 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2417 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2418 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2419 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2420 %tid.ext = sext i32 %tid to i64
2421 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2422 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2423 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2424 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2425 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2426 %a = load volatile double, double addrspace(1)* %a.gep
2427 %b = load volatile double, double addrspace(1)* %b.gep
2428 %c = load volatile double, double addrspace(1)* %c.gep
2429 %d = load volatile double, double addrspace(1)* %d.gep
2431 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2432 %fneg.fma0 = fsub double -0.0, %fma0
2433 %mul1 = fmul double %fneg.fma0, %c
2434 %mul2 = fmul double %fneg.fma0, %d
2436 store volatile double %mul1, double addrspace(1)* %out
2437 store volatile double %mul2, double addrspace(1)* %out
2441 ; %trunc.a has one fneg use, but it requires a code size increase and
2442 ; %the fneg can instead be folded for free into the fma.
2444 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2445 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2446 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2447 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2448 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2449 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2450 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2451 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2452 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2453 %tid.ext = sext i32 %tid to i64
2454 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2455 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2456 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2457 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2458 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2459 %a = load volatile float, float addrspace(1)* %a.gep
2460 %b = load volatile float, float addrspace(1)* %b.gep
2461 %c = load volatile float, float addrspace(1)* %c.gep
2462 %d = load volatile float, float addrspace(1)* %d.gep
2464 %trunc.a = call float @llvm.trunc.f32(float %a)
2465 %trunc.fneg.a = fsub float -0.0, %trunc.a
2466 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2467 store volatile float %fma0, float addrspace(1)* %out
2471 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2472 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2473 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2474 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2475 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2476 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2477 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2478 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2479 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2481 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2482 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2483 %tid.ext = sext i32 %tid to i64
2484 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2485 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2486 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2487 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2488 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2489 %a = load volatile float, float addrspace(1)* %a.gep
2490 %b = load volatile float, float addrspace(1)* %b.gep
2491 %c = load volatile float, float addrspace(1)* %c.gep
2492 %d = load volatile float, float addrspace(1)* %d.gep
2494 %trunc.a = call float @llvm.trunc.f32(float %a)
2495 %trunc.fneg.a = fsub float -0.0, %trunc.a
2496 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2497 %mul1 = fmul float %trunc.a, %d
2498 store volatile float %fma0, float addrspace(1)* %out
2499 store volatile float %mul1, float addrspace(1)* %out
2503 declare i32 @llvm.amdgcn.workitem.id.x() #1
2504 declare float @llvm.fma.f32(float, float, float) #1
2505 declare float @llvm.fmuladd.f32(float, float, float) #1
2506 declare float @llvm.sin.f32(float) #1
2507 declare float @llvm.trunc.f32(float) #1
2508 declare float @llvm.round.f32(float) #1
2509 declare float @llvm.rint.f32(float) #1
2510 declare float @llvm.nearbyint.f32(float) #1
2511 declare float @llvm.canonicalize.f32(float) #1
2512 declare float @llvm.minnum.f32(float, float) #1
2513 declare float @llvm.maxnum.f32(float, float) #1
2514 declare half @llvm.minnum.f16(half, half) #1
2515 declare double @llvm.minnum.f64(double, double) #1
2516 declare double @llvm.fma.f64(double, double, double) #1
2518 declare float @llvm.amdgcn.sin.f32(float) #1
2519 declare float @llvm.amdgcn.rcp.f32(float) #1
2520 declare float @llvm.amdgcn.rcp.legacy(float) #1
2521 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2522 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2523 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2525 attributes #0 = { nounwind }
2526 attributes #1 = { nounwind readnone }