1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMA %s
3 ; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
5 ; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=NOFUSE %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMA %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMAGFX10 %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefix=FMAGFX11 %s
10 ; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
11 ; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
12 ; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
13 ; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
14 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAD %s
15 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMADGFX10 %s
16 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefix=FMAGFX11 %s
18 ; Check for incorrect fmad formation when distributing
20 define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
21 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
23 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
25 ; FMA-NEXT: s_setpc_b64 s[30:31]
27 ; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
29 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1
31 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
32 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
34 ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
36 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
38 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
39 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
41 ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
43 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
45 ; FMAGFX11-NEXT: v_fmac_f32_e32 v0, v1, v0
46 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
48 ; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
50 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0
52 ; FMAD-NEXT: s_setpc_b64 s[30:31]
54 ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
56 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
58 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
59 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
60 %add = fadd fast float %arg1, 1.0
61 %tmp1 = fmul fast float %arg0, %add
65 define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
66 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
68 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69 ; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
70 ; FMA-NEXT: s_setpc_b64 s[30:31]
72 ; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
74 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1
76 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
77 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
79 ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
81 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
83 ; FMAGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
84 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
86 ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
88 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
90 ; FMAGFX11-NEXT: v_fma_f32 v0, -v1, v0, v0
91 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
93 ; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
95 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0
97 ; FMAD-NEXT: s_setpc_b64 s[30:31]
99 ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
100 ; FMADGFX10: ; %bb.0:
101 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
103 ; FMADGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
104 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
105 %add = fsub fast float 1.0, %arg1
106 %tmp1 = fmul fast float %arg0, %add
110 define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
111 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
113 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
115 ; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
116 ; FMA-NEXT: s_setpc_b64 s[30:31]
118 ; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
120 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3
122 ; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2
123 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
124 ; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
125 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
127 ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
129 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
131 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
132 ; FMAGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
133 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
135 ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
137 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
139 ; FMAGFX11-NEXT: v_dual_fmac_f32 v0, v2, v0 :: v_dual_fmac_f32 v1, v3, v1
140 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
142 ; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
144 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0
146 ; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1
147 ; FMAD-NEXT: s_setpc_b64 s[30:31]
149 ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
150 ; FMADGFX10: ; %bb.0:
151 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
153 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
154 ; FMADGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
155 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
156 %add = fadd fast <2 x float> %arg1, <float 1.0, float 1.0>
157 %tmp1 = fmul fast <2 x float> %arg0, %add
158 ret <2 x float> %tmp1
161 define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
162 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
164 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
166 ; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
167 ; FMA-NEXT: s_setpc_b64 s[30:31]
169 ; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
171 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3
173 ; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2
174 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
175 ; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
176 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
178 ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
180 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
182 ; FMAGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
183 ; FMAGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
184 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
186 ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
188 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
190 ; FMAGFX11-NEXT: v_fma_f32 v0, -v2, v0, v0
191 ; FMAGFX11-NEXT: v_fma_f32 v1, -v3, v1, v1
192 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
194 ; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
196 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0
198 ; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1
199 ; FMAD-NEXT: s_setpc_b64 s[30:31]
201 ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
202 ; FMADGFX10: ; %bb.0:
203 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
205 ; FMADGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
206 ; FMADGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
207 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
208 %add = fsub fast <2 x float> <float 1.0, float 1.0>, %arg1
209 %tmp1 = fmul fast <2 x float> %arg0, %add
210 ret <2 x float> %tmp1
213 define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
214 ; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
216 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
218 ; FMA-NEXT: s_setpc_b64 s[30:31]
220 ; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
222 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0
224 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
225 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
227 ; FMAGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
229 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
231 ; FMAGFX10-NEXT: v_fma_f32 v0, v0, v1, v1
232 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
234 ; FMAGFX11-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
236 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
238 ; FMAGFX11-NEXT: v_fma_f32 v0, v0, v1, v1
239 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
241 ; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
243 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1
245 ; FMAD-NEXT: s_setpc_b64 s[30:31]
247 ; FMADGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
248 ; FMADGFX10: ; %bb.0:
249 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
251 ; FMADGFX10-NEXT: v_mad_f32 v0, v0, v1, v1
252 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
253 %add = fadd fast float %arg0, 1.0
254 %splat = insertelement <2 x float> undef, float %add, i32 0
255 %tmp1 = fmul fast <2 x float> %arg1, %splat
256 ret <2 x float> %tmp1
259 define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
260 ; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
262 ; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
264 ; FMA-NEXT: s_setpc_b64 s[30:31]
266 ; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
268 ; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269 ; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0
270 ; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
271 ; NOFUSE-NEXT: s_setpc_b64 s[30:31]
273 ; FMAGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
275 ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
277 ; FMAGFX10-NEXT: v_fma_f32 v0, -v0, v1, v1
278 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
280 ; FMAGFX11-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
282 ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
284 ; FMAGFX11-NEXT: v_fma_f32 v0, -v0, v1, v1
285 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
287 ; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
289 ; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1
291 ; FMAD-NEXT: s_setpc_b64 s[30:31]
293 ; FMADGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
294 ; FMADGFX10: ; %bb.0:
295 ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
297 ; FMADGFX10-NEXT: v_mad_f32 v0, -v0, v1, v1
298 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
299 %sub = fsub fast float 1.0, %arg0
300 %splat = insertelement <2 x float> undef, float %sub, i32 0
301 %tmp1 = fmul fast <2 x float> %arg1, %splat
302 ret <2 x float> %tmp1
305 attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }