1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMA %s
3 ; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
5 ; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMA %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMAGFX10 %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMAGFX11 %s
10 ; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
11 ; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
12 ; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
13 ; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
14 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
15 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMADGFX10 %s
16 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAGFX11 %s
18 ; Check for incorrect fmad formation when distributing
20 define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
21 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
23 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
25 ; FMA-NEXT: s_setpc_b64 s[30:31]
27 ; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
29 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1
31 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
32 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
34 ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
36 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
38 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
40 ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
42 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; FMAGFX11-NEXT: v_fmac_f32_e32 v0, v1, v0
44 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
46 ; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
48 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0
50 ; FMAD-NEXT: s_setpc_b64 s[30:31]
52 ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
54 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
56 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
57 %add = fadd fast float %arg1, 1.0
58 %tmp1 = fmul fast float %arg0, %add
62 define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
63 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
65 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
67 ; FMA-NEXT: s_setpc_b64 s[30:31]
69 ; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
71 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1
73 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
74 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
76 ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
78 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; FMAGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
80 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
82 ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
84 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; FMAGFX11-NEXT: v_fma_f32 v0, -v1, v0, v0
86 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
88 ; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
90 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0
92 ; FMAD-NEXT: s_setpc_b64 s[30:31]
94 ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
96 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; FMADGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
98 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
99 %add = fsub fast float 1.0, %arg1
100 %tmp1 = fmul fast float %arg0, %add
104 define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
105 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
107 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
109 ; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
110 ; FMA-NEXT: s_setpc_b64 s[30:31]
112 ; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
114 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115 ; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3
116 ; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2
117 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
118 ; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
119 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
121 ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
123 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
125 ; FMAGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
126 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
128 ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
130 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; FMAGFX11-NEXT: v_dual_fmac_f32 v0, v2, v0 :: v_dual_fmac_f32 v1, v3, v1
132 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
134 ; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
136 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0
138 ; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1
139 ; FMAD-NEXT: s_setpc_b64 s[30:31]
141 ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
142 ; FMADGFX10: ; %bb.0:
143 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
145 ; FMADGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
146 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
147 %add = fadd fast <2 x float> %arg1, <float 1.0, float 1.0>
148 %tmp1 = fmul fast <2 x float> %arg0, %add
149 ret <2 x float> %tmp1
152 define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
153 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
155 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
157 ; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
158 ; FMA-NEXT: s_setpc_b64 s[30:31]
160 ; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
162 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163 ; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3
164 ; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2
165 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
166 ; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
167 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
169 ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
171 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; FMAGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
173 ; FMAGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
174 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
176 ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
178 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; FMAGFX11-NEXT: v_fma_f32 v0, -v2, v0, v0
180 ; FMAGFX11-NEXT: v_fma_f32 v1, -v3, v1, v1
181 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
183 ; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
185 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0
187 ; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1
188 ; FMAD-NEXT: s_setpc_b64 s[30:31]
190 ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
191 ; FMADGFX10: ; %bb.0:
192 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; FMADGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
194 ; FMADGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
195 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
196 %add = fsub fast <2 x float> <float 1.0, float 1.0>, %arg1
197 %tmp1 = fmul fast <2 x float> %arg0, %add
198 ret <2 x float> %tmp1
201 define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
202 ; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
204 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
206 ; FMA-NEXT: s_setpc_b64 s[30:31]
208 ; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
210 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0
212 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
213 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
215 ; FMAGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
217 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; FMAGFX10-NEXT: v_fma_f32 v0, v0, v1, v1
219 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
221 ; FMAGFX11-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
223 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; FMAGFX11-NEXT: v_fma_f32 v0, v0, v1, v1
225 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
227 ; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
229 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1
231 ; FMAD-NEXT: s_setpc_b64 s[30:31]
233 ; FMADGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
234 ; FMADGFX10: ; %bb.0:
235 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; FMADGFX10-NEXT: v_mad_f32 v0, v0, v1, v1
237 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
238 %add = fadd fast float %arg0, 1.0
239 %splat = insertelement <2 x float> undef, float %add, i32 0
240 %tmp1 = fmul fast <2 x float> %arg1, %splat
241 ret <2 x float> %tmp1
244 define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
245 ; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
247 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248 ; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
249 ; FMA-NEXT: s_setpc_b64 s[30:31]
251 ; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
253 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0
255 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
256 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
258 ; FMAGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
260 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; FMAGFX10-NEXT: v_fma_f32 v0, -v0, v1, v1
262 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
264 ; FMAGFX11-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
266 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; FMAGFX11-NEXT: v_fma_f32 v0, -v0, v1, v1
268 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
270 ; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
272 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1
274 ; FMAD-NEXT: s_setpc_b64 s[30:31]
276 ; FMADGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
277 ; FMADGFX10: ; %bb.0:
278 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; FMADGFX10-NEXT: v_mad_f32 v0, -v0, v1, v1
280 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
281 %sub = fsub fast float 1.0, %arg0
282 %splat = insertelement <2 x float> undef, float %sub, i32 0
283 %tmp1 = fmul fast <2 x float> %arg1, %splat
284 ret <2 x float> %tmp1
287 attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }