1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
6 ; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32:
10 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
11 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
13 ; GFX9-F32DENORM-NEXT: v_mul_f16
14 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
15 ; GFX9-F32DENORM-NEXT: v_add_f32
16 define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
18 %mul = fmul half %x, %y
19 %mul.ext = fpext half %mul to float
20 %add = fadd float %mul.ext, %z
24 ; f16->f64 is not free.
25 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64:
27 ; GFX89: v_cvt_f32_f16
28 ; GFX89: v_cvt_f64_f32
30 define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 {
32 %mul = fmul half %x, %y
33 %mul.ext = fpext half %mul to double
34 %add = fadd double %mul.ext, %z
38 ; f32->f64 is not free.
39 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64:
43 define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 {
45 %mul = fmul float %x, %y
46 %mul.ext = fpext float %mul to double
47 %add = fadd double %mul.ext, %z
51 ; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
52 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute:
54 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
55 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
57 ; GFX9-F32DENORM-NEXT: v_mul_f16
58 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
59 ; GFX9-F32DENORM-NEXT: v_add_f32
60 ; GFX9-F32DENORM-NEXT: s_setpc_b64
61 define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 {
63 %mul = fmul half %x, %y
64 %mul.ext = fpext half %mul to float
65 %add = fadd float %z, %mul.ext
69 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
70 ; -> (fma x, y, (fma (fpext u), (fpext v), z))
72 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32:
74 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
75 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
76 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
77 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
79 ; GFX9-F32DENORM-NEXT: v_mul_f16
80 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
81 ; GFX9-F32DENORM-NEXT: v_fma_f32
82 ; GFX9-F32DENORM-NEXT: v_add_f32
83 ; GFX9-F32DENORM-NEXT: s_setpc_b64
84 define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
86 %mul = fmul half %u, %v
87 %mul.ext = fpext half %mul to float
88 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
89 %add = fadd float %fma, %z
93 ; fold (fadd x, (fma y, z, (fpext (fmul u, v)))
94 ; -> (fma y, z, (fma (fpext u), (fpext v), x))
95 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute:
97 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
98 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
99 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
100 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
102 ; GFX9-F32DENORM-NEXT: v_mul_f16
103 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
104 ; GFX9-F32DENORM-NEXT: v_fma_f32
105 ; GFX9-F32DENORM-NEXT: v_add_f32
106 ; GFX9-F32DENORM-NEXT: s_setpc_b64
107 define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
109 %mul = fmul half %u, %v
110 %mul.ext = fpext half %mul to float
111 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
112 %add = fadd float %z, %fma
116 ; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32:
118 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
119 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
120 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
121 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
123 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
124 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
125 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
126 define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
128 %mul = fmul half %u, %v
129 %mul.ext = fpext half %mul to float
130 %mul1 = fmul contract float %x, %y
131 %fmad = fadd contract float %mul1, %mul.ext
132 %add = fadd float %fmad, %z
136 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
137 ; -> (fma x, y, (fma (fpext u), (fpext v), z))
139 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32:
142 ; GFX89: v_cvt_f32_f16
145 define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
147 %mul = fmul contract half %u, %v
148 %mul.ext = fpext half %mul to float
149 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
150 %add = fadd float %fma, %z
154 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute:
157 ; GFX89: v_cvt_f32_f16
160 define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
162 %mul = fmul contract half %u, %v
163 %mul.ext = fpext half %mul to float
164 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
165 %add = fadd float %z, %fma
169 ; fold (fadd x, (fpext (fma y, z, (fmul u, v)))
170 ; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
172 ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
175 ; GFX9: v_cvt_f32_f16
176 ; GFX9: v_add_f32_e32
177 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
179 %mul = fmul contract half %u, %v
180 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
181 %ext.fma = fpext half %fma to float
182 %add = fadd float %x, %ext.fma
186 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
189 ; GFX9: v_cvt_f32_f16
190 ; GFX9: v_add_f32_e32
191 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
193 %mul = fmul contract half %u, %v
194 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
195 %ext.fma = fpext half %fma to float
196 %add = fadd float %x, %ext.fma
200 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
203 ; GFX9: v_cvt_f32_f16
204 ; GFX9: v_add_f32_e32
205 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
207 %mul = fmul contract half %u, %v
208 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
209 %ext.fma = fpext half %fma to float
210 %add = fadd float %ext.fma, %x
214 ; fold (fsub (fpext (fmul x, y)), z)
215 ; -> (fma (fpext x), (fpext y), (fneg z))
217 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32:
219 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}}
220 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
222 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
223 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
224 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
225 ; GFX9-F32DENORM-NEXT: s_setpc_b64
226 define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
228 %mul = fmul half %x, %y
229 %mul.ext = fpext half %mul to float
230 %add = fsub float %mul.ext, %z
234 ; fold (fsub x, (fpext (fmul y, z)))
235 ; -> (fma (fneg (fpext y)), (fpext z), x)
237 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute:
239 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
240 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
242 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32
243 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32
244 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32
245 ; GFX9-F32DENORM-NEXT: s_setpc_b64
246 define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 {
248 %mul = fmul contract half %y, %z
249 %mul.ext = fpext half %mul to float
250 %add = fsub contract float %x, %mul.ext
254 ; fold (fsub (fpext (fneg (fmul, x, y))), z)
255 ; -> (fneg (fma (fpext x), (fpext y), z))
257 ; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32:
259 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
260 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
262 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
263 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
264 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
265 ; GFX9-F32DENORM-NEXT: s_setpc_b64
266 define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
268 %mul = fmul half %x, %y
269 %neg.mul = fsub half -0.0, %mul
270 %neg.mul.ext = fpext half %neg.mul to float
271 %add = fsub float %neg.mul.ext, %z
275 ; fold (fsub (fneg (fpext (fmul, x, y))), z)
276 ; -> (fneg (fma (fpext x)), (fpext y), z)
278 ; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32:
280 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
281 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
283 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
284 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
285 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
286 ; GFX9-F32DENORM-NEXT: s_setpc_b64
287 define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
289 %mul = fmul half %x, %y
290 %mul.ext = fpext half %mul to float
291 %neg.mul.ext = fsub float -0.0, %mul.ext
292 %add = fsub float %neg.mul.ext, %z
296 ; fold (fsub (fmad x, y, (fpext (fmul u, v))), z)
297 ; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z)))
298 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32:
300 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}}
301 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
302 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
303 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
305 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
306 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
307 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3
308 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
309 ; GFX9-F32DENORM-NEXT: s_setpc_b64
310 define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
312 %mul = fmul half %u, %v
313 %mul.ext = fpext half %mul to float
314 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
315 %add = fsub float %fma, %z
319 ; fold (fsub (fpext (fmad x, y, (fmul u, v))), z)
320 ; -> (fmad (fpext x), (fpext y),
321 ; (fmad (fpext u), (fpext v), (fneg z)))
323 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
326 ; GFX9: v_cvt_f32_f16
329 define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 {
331 %mul = fmul half %u, %v
332 %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul)
333 %fma.ext = fpext half %fma to float
334 %add = fsub float %fma.ext, %z
338 ; fold (fsub x, (fmad y, z, (fpext (fmul u, v))))
339 ; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x))
340 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute:
342 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}}
343 ; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}}
344 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
346 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
347 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
348 ; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3
349 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1
350 ; GFX9-F32DENORM-NEXT: s_setpc_b64
351 define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
353 %mul = fmul half %u, %v
354 %mul.ext = fpext half %mul to float
355 %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
356 %add = fsub float %x, %fma
360 ; fold (fsub x, (fpext (fma y, z, (fmul u, v))))
361 ; -> (fma (fneg (fpext y)), (fpext z),
362 ; (fma (fneg (fpext u)), (fpext v), x))
363 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
365 ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
366 ; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
367 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
368 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
369 ; GFX9-NEXT: s_setpc_b64
370 define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
372 %mul = fmul half %u, %v
373 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
374 %fma.ext = fpext half %fma to float
375 %add = fsub float %x, %fma.ext
379 declare float @llvm.fmuladd.f32(float, float, float) #0
380 declare float @llvm.fma.f32(float, float, float) #0
381 declare half @llvm.fmuladd.f16(half, half, half) #0
382 declare half @llvm.fma.f16(half, half, half) #0
384 attributes #0 = { nounwind readnone speculatable }