1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
6 ; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32:
10 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
11 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
13 ; GFX9-F32DENORM-NEXT: v_mul_f16
14 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
15 ; GFX9-F32DENORM-NEXT: v_add_f32
16 define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
18 %mul = fmul half %x, %y
19 %mul.ext = fpext half %mul to float
20 %add = fadd float %mul.ext, %z
24 ; f16->f64 is not free.
25 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64:
27 ; GFX89: v_cvt_f32_f16
28 ; GFX89: v_cvt_f64_f32
30 define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 {
32 %mul = fmul half %x, %y
33 %mul.ext = fpext half %mul to double
34 %add = fadd double %mul.ext, %z
38 ; f32->f64 is not free.
39 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64:
43 define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 {
45 %mul = fmul float %x, %y
46 %mul.ext = fpext float %mul to double
47 %add = fadd double %mul.ext, %z
51 ; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
52 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute:
54 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
55 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
57 ; GFX9-F32DENORM-NEXT: v_mul_f16
58 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
59 ; GFX9-F32DENORM-NEXT: v_add_f32
60 ; GFX9-F32DENORM-NEXT: s_setpc_b64
61 define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 {
63 %mul = fmul half %x, %y
64 %mul.ext = fpext half %mul to float
65 %add = fadd float %z, %mul.ext
69 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
70 ; -> (fma x, y, (fma (fpext u), (fpext v), z))
72 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32:
74 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
75 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
76 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
77 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
79 ; GFX9-F32DENORM-NEXT: v_mul_f16
80 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
81 ; GFX9-F32DENORM-NEXT: v_fma_f32
82 ; GFX9-F32DENORM-NEXT: v_add_f32
83 ; GFX9-F32DENORM-NEXT: s_setpc_b64
84 define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
86 %mul = fmul half %u, %v
87 %mul.ext = fpext half %mul to float
88 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
89 %add = fadd float %fma, %z
93 ; fold (fadd x, (fma y, z, (fpext (fmul u, v)))
94 ; -> (fma y, z, (fma (fpext u), (fpext v), x))
95 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute:
97 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
98 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
99 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
100 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
102 ; GFX9-F32DENORM-NEXT: v_mul_f16
103 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
104 ; GFX9-F32DENORM-NEXT: v_fma_f32
105 ; GFX9-F32DENORM-NEXT: v_add_f32
106 ; GFX9-F32DENORM-NEXT: s_setpc_b64
107 define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
109 %mul = fmul half %u, %v
110 %mul.ext = fpext half %mul to float
111 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
112 %add = fadd float %z, %fma
116 ; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32:
118 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
119 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
120 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
121 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
123 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
124 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
125 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
126 define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
128 %mul = fmul half %u, %v
129 %mul.ext = fpext half %mul to float
130 %mul1 = fmul contract float %x, %y
131 %fmad = fadd contract float %mul1, %mul.ext
132 %add = fadd float %fmad, %z
136 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
137 ; -> (fma x, y, (fma (fpext u), (fpext v), z))
139 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32:
141 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
142 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
143 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
144 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
146 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
147 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
148 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
149 ; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v4
150 ; GFX9-F32DENORM-NEXT: s_setpc_b64
151 define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
153 %mul = fmul contract half %u, %v
154 %mul.ext = fpext half %mul to float
155 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
156 %add = fadd float %fma, %z
160 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute:
162 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
163 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
164 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
165 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
167 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
168 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
169 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
170 ; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v4, v0
171 ; GFX9-F32DENORM-NEXT: s_setpc_b64
172 define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
174 %mul = fmul contract half %u, %v
175 %mul.ext = fpext half %mul to float
176 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
177 %add = fadd float %z, %fma
181 ; fold (fadd x, (fpext (fma y, z, (fmul u, v)))
182 ; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
184 ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
186 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0]
187 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0]
188 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
190 ; GFX9-F32DENORM-NEXT: v_mul_f16
191 ; GFX9-F32DENORM-NEXT: v_fma_f16
192 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
193 ; GFX9-F32DENORM-NEXT: v_add_f32
194 ; GFX9-F32DENORM-NEXT: s_setpc_b64
195 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
197 %mul = fmul contract half %u, %v
198 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
199 %ext.fma = fpext half %fma to float
200 %add = fadd float %x, %ext.fma
204 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
206 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0]
207 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0]
208 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
210 ; GFX9-F32DENORM-NEXT: v_mul_f16
211 ; GFX9-F32DENORM-NEXT: v_fma_f16
212 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
213 ; GFX9-F32DENORM-NEXT: v_add_f32
214 ; GFX9-F32DENORM-NEXT: s_setpc_b64
215 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
217 %mul = fmul contract half %u, %v
218 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
219 %ext.fma = fpext half %fma to float
220 %add = fadd float %x, %ext.fma
224 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
226 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0]
227 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0]
228 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
230 ; GFX9-F32DENORM-NEXT: v_mul_f16
231 ; GFX9-F32DENORM-NEXT: v_fma_f16
232 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
233 ; GFX9-F32DENORM-NEXT: v_add_f32_e32
234 ; GFX9-F32DENORM-NEXT: s_setpc_b64
235 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
237 %mul = fmul contract half %u, %v
238 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
239 %ext.fma = fpext half %fma to float
240 %add = fadd float %ext.fma, %x
244 ; fold (fsub (fpext (fmul x, y)), z)
245 ; -> (fma (fpext x), (fpext y), (fneg z))
247 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32:
249 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}}
250 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
252 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
253 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
254 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
255 ; GFX9-F32DENORM-NEXT: s_setpc_b64
256 define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
258 %mul = fmul half %x, %y
259 %mul.ext = fpext half %mul to float
260 %add = fsub float %mul.ext, %z
264 ; fold (fsub x, (fpext (fmul y, z)))
265 ; -> (fma (fneg (fpext y)), (fpext z), x)
267 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute:
269 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
270 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
272 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32
273 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32
274 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32
275 ; GFX9-F32DENORM-NEXT: s_setpc_b64
276 define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 {
278 %mul = fmul contract half %y, %z
279 %mul.ext = fpext half %mul to float
280 %add = fsub contract float %x, %mul.ext
284 ; fold (fsub (fpext (fneg (fmul, x, y))), z)
285 ; -> (fneg (fma (fpext x), (fpext y), z))
287 ; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32:
289 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
290 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
292 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
293 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
294 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
295 ; GFX9-F32DENORM-NEXT: s_setpc_b64
296 define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
298 %mul = fmul half %x, %y
299 %neg.mul = fsub half -0.0, %mul
300 %neg.mul.ext = fpext half %neg.mul to float
301 %add = fsub float %neg.mul.ext, %z
305 ; fold (fsub (fneg (fpext (fmul, x, y))), z)
306 ; -> (fneg (fma (fpext x)), (fpext y), z)
308 ; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32:
310 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
311 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
313 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
314 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
315 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
316 ; GFX9-F32DENORM-NEXT: s_setpc_b64
317 define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
319 %mul = fmul half %x, %y
320 %mul.ext = fpext half %mul to float
321 %neg.mul.ext = fneg float %mul.ext
322 %add = fsub float %neg.mul.ext, %z
326 ; fold (fsub (fmad x, y, (fpext (fmul u, v))), z)
327 ; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z)))
328 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32:
330 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}}
331 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
332 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
333 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
335 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
336 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
337 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3
338 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
339 ; GFX9-F32DENORM-NEXT: s_setpc_b64
340 define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
342 %mul = fmul reassoc half %u, %v
343 %mul.ext = fpext half %mul to float
344 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
345 %add = fsub reassoc float %fma, %z
349 ; fold (fsub (fpext (fmad x, y, (fmul u, v))), z)
350 ; -> (fmad (fpext x), (fpext y),
351 ; (fmad (fpext u), (fpext v), (fneg z)))
353 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
356 ; GFX9: v_cvt_f32_f16
359 define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 {
361 %mul = fmul half %u, %v
362 %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul)
363 %fma.ext = fpext half %fma to float
364 %add = fsub float %fma.ext, %z
368 ; fold (fsub x, (fmad y, z, (fpext (fmul u, v))))
369 ; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x))
370 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute:
372 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}}
373 ; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}}
374 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
376 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
377 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
378 ; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3
379 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1
380 ; GFX9-F32DENORM-NEXT: s_setpc_b64
381 define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
383 %mul = fmul reassoc half %u, %v
384 %mul.ext = fpext half %mul to float
385 %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
386 %add = fsub reassoc float %x, %fma
390 ; fold (fsub x, (fpext (fma y, z, (fmul u, v))))
391 ; -> (fma (fneg (fpext y)), (fpext z),
392 ; (fma (fneg (fpext u)), (fpext v), x))
393 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
395 ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
396 ; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
397 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
398 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
399 ; GFX9-NEXT: s_setpc_b64
400 define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
402 %mul = fmul half %u, %v
403 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
404 %fma.ext = fpext half %fma to float
405 %add = fsub float %x, %fma.ext
409 declare float @llvm.fmuladd.f32(float, float, float) #0
410 declare float @llvm.fma.f32(float, float, float) #0
411 declare half @llvm.fmuladd.f16(half, half, half) #0
412 declare half @llvm.fma.f16(half, half, half) #0
414 attributes #0 = { nounwind readnone speculatable }