1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
8 define float @v_fma_f32(float %x, float %y, float %z) {
9 ; GFX6-LABEL: v_fma_f32:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
13 ; GFX6-NEXT: s_setpc_b64 s[30:31]
15 ; GFX8-LABEL: v_fma_f32:
17 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2
19 ; GFX8-NEXT: s_setpc_b64 s[30:31]
21 ; GFX9-LABEL: v_fma_f32:
23 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2
25 ; GFX9-NEXT: s_setpc_b64 s[30:31]
27 ; GFX10-LABEL: v_fma_f32:
29 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
31 ; GFX10-NEXT: s_setpc_b64 s[30:31]
33 ; GFX11-LABEL: v_fma_f32:
35 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2
37 ; GFX11-NEXT: s_setpc_b64 s[30:31]
38 %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
42 define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
43 ; GFX6-LABEL: v_fma_v2f32:
45 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
47 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
48 ; GFX6-NEXT: s_setpc_b64 s[30:31]
50 ; GFX8-LABEL: v_fma_v2f32:
52 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX8-NEXT: v_fma_f32 v0, v0, v2, v4
54 ; GFX8-NEXT: v_fma_f32 v1, v1, v3, v5
55 ; GFX8-NEXT: s_setpc_b64 s[30:31]
57 ; GFX9-LABEL: v_fma_v2f32:
59 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX9-NEXT: v_fma_f32 v0, v0, v2, v4
61 ; GFX9-NEXT: v_fma_f32 v1, v1, v3, v5
62 ; GFX9-NEXT: s_setpc_b64 s[30:31]
64 ; GFX10-LABEL: v_fma_v2f32:
66 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4
68 ; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5
69 ; GFX10-NEXT: s_setpc_b64 s[30:31]
71 ; GFX11-LABEL: v_fma_v2f32:
73 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX11-NEXT: v_fma_f32 v0, v0, v2, v4
75 ; GFX11-NEXT: v_fma_f32 v1, v1, v3, v5
76 ; GFX11-NEXT: s_setpc_b64 s[30:31]
77 %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
81 define half @v_fma_f16(half %x, half %y, half %z) {
82 ; GFX6-LABEL: v_fma_f16:
84 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
86 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
87 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
88 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
89 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
90 ; GFX6-NEXT: s_setpc_b64 s[30:31]
92 ; GFX8-LABEL: v_fma_f16:
94 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
96 ; GFX8-NEXT: s_setpc_b64 s[30:31]
98 ; GFX9-LABEL: v_fma_f16:
100 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2
102 ; GFX9-NEXT: s_setpc_b64 s[30:31]
104 ; GFX10-LABEL: v_fma_f16:
106 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
108 ; GFX10-NEXT: s_setpc_b64 s[30:31]
110 ; GFX11-LABEL: v_fma_f16:
112 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
114 ; GFX11-NEXT: s_setpc_b64 s[30:31]
115 %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
119 define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
120 ; GFX6-LABEL: v_fma_f16_fneg_lhs:
122 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX6-NEXT: v_cvt_f32_f16_e64 v0, -v0
124 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
125 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
126 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
127 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
128 ; GFX6-NEXT: s_setpc_b64 s[30:31]
130 ; GFX8-LABEL: v_fma_f16_fneg_lhs:
132 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX8-NEXT: v_fma_f16 v0, -v0, v1, v2
134 ; GFX8-NEXT: s_setpc_b64 s[30:31]
136 ; GFX9-LABEL: v_fma_f16_fneg_lhs:
138 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139 ; GFX9-NEXT: v_fma_f16 v0, -v0, v1, v2
140 ; GFX9-NEXT: s_setpc_b64 s[30:31]
142 ; GFX10-LABEL: v_fma_f16_fneg_lhs:
144 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
146 ; GFX10-NEXT: s_setpc_b64 s[30:31]
148 ; GFX11-LABEL: v_fma_f16_fneg_lhs:
150 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
152 ; GFX11-NEXT: s_setpc_b64 s[30:31]
153 %neg.x = fneg half %x
154 %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
158 define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
159 ; GFX6-LABEL: v_fma_f16_fneg_rhs:
161 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
163 ; GFX6-NEXT: v_cvt_f32_f16_e64 v1, -v1
164 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
165 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
166 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
167 ; GFX6-NEXT: s_setpc_b64 s[30:31]
169 ; GFX8-LABEL: v_fma_f16_fneg_rhs:
171 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX8-NEXT: v_fma_f16 v0, v0, -v1, v2
173 ; GFX8-NEXT: s_setpc_b64 s[30:31]
175 ; GFX9-LABEL: v_fma_f16_fneg_rhs:
177 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX9-NEXT: v_fma_f16 v0, v0, -v1, v2
179 ; GFX9-NEXT: s_setpc_b64 s[30:31]
181 ; GFX10-LABEL: v_fma_f16_fneg_rhs:
183 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
185 ; GFX10-NEXT: s_setpc_b64 s[30:31]
187 ; GFX11-LABEL: v_fma_f16_fneg_rhs:
189 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
191 ; GFX11-NEXT: s_setpc_b64 s[30:31]
192 %neg.y = fneg half %y
193 %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
197 define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
198 ; GFX6-LABEL: v_fma_f16_fneg_add:
200 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
202 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
203 ; GFX6-NEXT: v_cvt_f32_f16_e64 v2, -v2
204 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
205 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
206 ; GFX6-NEXT: s_setpc_b64 s[30:31]
208 ; GFX8-LABEL: v_fma_f16_fneg_add:
210 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, -v2
212 ; GFX8-NEXT: s_setpc_b64 s[30:31]
214 ; GFX9-LABEL: v_fma_f16_fneg_add:
216 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX9-NEXT: v_fma_f16 v0, v0, v1, -v2
218 ; GFX9-NEXT: s_setpc_b64 s[30:31]
220 ; GFX10-LABEL: v_fma_f16_fneg_add:
222 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
224 ; GFX10-NEXT: s_setpc_b64 s[30:31]
226 ; GFX11-LABEL: v_fma_f16_fneg_add:
228 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
230 ; GFX11-NEXT: s_setpc_b64 s[30:31]
231 %neg.z = fneg half %z
232 %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
236 define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
237 ; GFX6-LABEL: v_fma_v2f16:
239 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
241 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
242 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4
243 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
244 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
245 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
246 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
247 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
248 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
249 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
250 ; GFX6-NEXT: s_setpc_b64 s[30:31]
252 ; GFX8-LABEL: v_fma_v2f16:
254 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
256 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
257 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
258 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
259 ; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
260 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
261 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
262 ; GFX8-NEXT: s_setpc_b64 s[30:31]
264 ; GFX9-LABEL: v_fma_v2f16:
266 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2
268 ; GFX9-NEXT: s_setpc_b64 s[30:31]
270 ; GFX10-LABEL: v_fma_v2f16:
272 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
274 ; GFX10-NEXT: s_setpc_b64 s[30:31]
276 ; GFX11-LABEL: v_fma_v2f16:
278 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
280 ; GFX11-NEXT: s_setpc_b64 s[30:31]
281 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z)
285 define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
286 ; GFX6-LABEL: v_fma_v2f16_fneg_lhs:
288 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
290 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
291 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
292 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
293 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
294 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
295 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
296 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4
297 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
298 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
299 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
300 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
301 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
302 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
303 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
304 ; GFX6-NEXT: s_setpc_b64 s[30:31]
306 ; GFX8-LABEL: v_fma_v2f16_fneg_lhs:
308 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
310 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
311 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
312 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
313 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
314 ; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
315 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
316 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
317 ; GFX8-NEXT: s_setpc_b64 s[30:31]
319 ; GFX9-LABEL: v_fma_v2f16_fneg_lhs:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
323 ; GFX9-NEXT: s_setpc_b64 s[30:31]
325 ; GFX10-LABEL: v_fma_v2f16_fneg_lhs:
327 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
329 ; GFX10-NEXT: s_setpc_b64 s[30:31]
331 ; GFX11-LABEL: v_fma_v2f16_fneg_lhs:
333 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
335 ; GFX11-NEXT: s_setpc_b64 s[30:31]
336 %x.fneg = fneg <2 x half> %x
337 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y, <2 x half> %z)
341 define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
342 ; GFX6-LABEL: v_fma_v2f16_fneg_rhs:
344 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
346 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
347 ; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
348 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
349 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
350 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
351 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
352 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4
353 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
354 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
355 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
356 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
357 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
358 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
359 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
360 ; GFX6-NEXT: s_setpc_b64 s[30:31]
362 ; GFX8-LABEL: v_fma_v2f16_fneg_rhs:
364 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
366 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
367 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
368 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
369 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
370 ; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
371 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
372 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
373 ; GFX8-NEXT: s_setpc_b64 s[30:31]
375 ; GFX9-LABEL: v_fma_v2f16_fneg_rhs:
377 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
379 ; GFX9-NEXT: s_setpc_b64 s[30:31]
381 ; GFX10-LABEL: v_fma_v2f16_fneg_rhs:
383 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
385 ; GFX10-NEXT: s_setpc_b64 s[30:31]
387 ; GFX11-LABEL: v_fma_v2f16_fneg_rhs:
389 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
391 ; GFX11-NEXT: s_setpc_b64 s[30:31]
392 %y.fneg = fneg <2 x half> %y
393 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y.fneg, <2 x half> %z)
397 define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
398 ; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs:
400 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
402 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
403 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4
404 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
405 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
406 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
407 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
408 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
409 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
410 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
411 ; GFX6-NEXT: s_setpc_b64 s[30:31]
413 ; GFX8-LABEL: v_fma_v2f16_fneg_lhs_rhs:
415 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
417 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
418 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
419 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
420 ; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
421 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
422 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
423 ; GFX8-NEXT: s_setpc_b64 s[30:31]
425 ; GFX9-LABEL: v_fma_v2f16_fneg_lhs_rhs:
427 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2
429 ; GFX9-NEXT: s_setpc_b64 s[30:31]
431 ; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs:
433 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
435 ; GFX10-NEXT: s_setpc_b64 s[30:31]
437 ; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs:
439 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
441 ; GFX11-NEXT: s_setpc_b64 s[30:31]
442 %x.fneg = fneg <2 x half> %x
443 %y.fneg = fneg <2 x half> %y
444 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg, <2 x half> %z)
448 define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
449 ; GFX6-LABEL: v_fma_v3f16:
451 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
453 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
454 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v6
455 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
456 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
457 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
458 ; GFX6-NEXT: v_fma_f32 v0, v0, v3, v6
459 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v4
460 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v7
461 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v8
462 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
463 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v4
464 ; GFX6-NEXT: v_fma_f32 v2, v2, v5, v6
465 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
466 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
467 ; GFX6-NEXT: s_setpc_b64 s[30:31]
469 ; GFX8-LABEL: v_fma_v3f16:
471 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
473 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2
474 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
475 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
476 ; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8
477 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
478 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
479 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
480 ; GFX8-NEXT: s_setpc_b64 s[30:31]
482 ; GFX9-LABEL: v_fma_v3f16:
484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
486 ; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
487 ; GFX9-NEXT: s_setpc_b64 s[30:31]
489 ; GFX10-LABEL: v_fma_v3f16:
491 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
493 ; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
494 ; GFX10-NEXT: s_setpc_b64 s[30:31]
496 ; GFX11-LABEL: v_fma_v3f16:
498 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
500 ; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5
501 ; GFX11-NEXT: s_setpc_b64 s[30:31]
502 %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z)
506 define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
507 ; GFX6-LABEL: v_fma_v4f16:
509 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
511 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4
512 ; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v8
513 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
514 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5
515 ; GFX6-NEXT: v_cvt_f32_f16_e32 v9, v9
516 ; GFX6-NEXT: v_fma_f32 v0, v0, v4, v8
517 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
518 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v6
519 ; GFX6-NEXT: v_fma_f32 v1, v1, v5, v9
520 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v10
521 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
522 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v7
523 ; GFX6-NEXT: v_cvt_f32_f16_e32 v7, v11
524 ; GFX6-NEXT: v_fma_f32 v2, v2, v4, v5
525 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
526 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
527 ; GFX6-NEXT: v_fma_f32 v3, v3, v6, v7
528 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
529 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
530 ; GFX6-NEXT: s_setpc_b64 s[30:31]
532 ; GFX8-LABEL: v_fma_v4f16:
534 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
536 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2
537 ; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4
538 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1
539 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3
540 ; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5
541 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
542 ; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10
543 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
544 ; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11
545 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
546 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
547 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3
548 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
549 ; GFX8-NEXT: s_setpc_b64 s[30:31]
551 ; GFX9-LABEL: v_fma_v4f16:
553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
555 ; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
556 ; GFX9-NEXT: s_setpc_b64 s[30:31]
558 ; GFX10-LABEL: v_fma_v4f16:
560 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
562 ; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
563 ; GFX10-NEXT: s_setpc_b64 s[30:31]
565 ; GFX11-LABEL: v_fma_v4f16:
567 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
569 ; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5
570 ; GFX11-NEXT: s_setpc_b64 s[30:31]
571 %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z)
575 define double @v_fma_f64(double %x, double %y, double %z) {
576 ; GFX6-LABEL: v_fma_f64:
578 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
580 ; GFX6-NEXT: s_setpc_b64 s[30:31]
582 ; GFX8-LABEL: v_fma_f64:
584 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
586 ; GFX8-NEXT: s_setpc_b64 s[30:31]
588 ; GFX9-LABEL: v_fma_f64:
590 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
592 ; GFX9-NEXT: s_setpc_b64 s[30:31]
594 ; GFX10-LABEL: v_fma_f64:
596 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
598 ; GFX10-NEXT: s_setpc_b64 s[30:31]
600 ; GFX11-LABEL: v_fma_f64:
602 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
604 ; GFX11-NEXT: s_setpc_b64 s[30:31]
605 %fma = call double @llvm.fma.f64(double %x, double %y, double %z)
609 define double @v_fma_f64_fneg_all(double %x, double %y, double %z) {
610 ; GFX6-LABEL: v_fma_f64_fneg_all:
612 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613 ; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
614 ; GFX6-NEXT: s_setpc_b64 s[30:31]
616 ; GFX8-LABEL: v_fma_f64_fneg_all:
618 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
620 ; GFX8-NEXT: s_setpc_b64 s[30:31]
622 ; GFX9-LABEL: v_fma_f64_fneg_all:
624 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625 ; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
626 ; GFX9-NEXT: s_setpc_b64 s[30:31]
628 ; GFX10-LABEL: v_fma_f64_fneg_all:
630 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
632 ; GFX10-NEXT: s_setpc_b64 s[30:31]
634 ; GFX11-LABEL: v_fma_f64_fneg_all:
636 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
638 ; GFX11-NEXT: s_setpc_b64 s[30:31]
639 %neg.x = fneg double %x
640 %neg.y = fneg double %y
641 %neg.z = fneg double %z
642 %fma = call double @llvm.fma.f64(double %neg.x, double %neg.y, double %neg.z)
646 define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
647 ; GFX6-LABEL: v_fma_v2f64:
649 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650 ; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
651 ; GFX6-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
652 ; GFX6-NEXT: s_setpc_b64 s[30:31]
654 ; GFX8-LABEL: v_fma_v2f64:
656 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
658 ; GFX8-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
659 ; GFX8-NEXT: s_setpc_b64 s[30:31]
661 ; GFX9-LABEL: v_fma_v2f64:
663 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664 ; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
665 ; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
666 ; GFX9-NEXT: s_setpc_b64 s[30:31]
668 ; GFX10-LABEL: v_fma_v2f64:
670 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
672 ; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
673 ; GFX10-NEXT: s_setpc_b64 s[30:31]
675 ; GFX11-LABEL: v_fma_v2f64:
677 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
679 ; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
680 ; GFX11-NEXT: s_setpc_b64 s[30:31]
681 %fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z)
682 ret <2 x double> %fma
685 define float @v_fma_f32_fabs_lhs(float %x, float %y, float %z) {
686 ; GFX6-LABEL: v_fma_f32_fabs_lhs:
688 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689 ; GFX6-NEXT: v_fma_f32 v0, |v0|, v1, v2
690 ; GFX6-NEXT: s_setpc_b64 s[30:31]
692 ; GFX8-LABEL: v_fma_f32_fabs_lhs:
694 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695 ; GFX8-NEXT: v_fma_f32 v0, |v0|, v1, v2
696 ; GFX8-NEXT: s_setpc_b64 s[30:31]
698 ; GFX9-LABEL: v_fma_f32_fabs_lhs:
700 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701 ; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, v2
702 ; GFX9-NEXT: s_setpc_b64 s[30:31]
704 ; GFX10-LABEL: v_fma_f32_fabs_lhs:
706 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707 ; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2
708 ; GFX10-NEXT: s_setpc_b64 s[30:31]
710 ; GFX11-LABEL: v_fma_f32_fabs_lhs:
712 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713 ; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2
714 ; GFX11-NEXT: s_setpc_b64 s[30:31]
715 %fabs.x = call float @llvm.fabs.f32(float %x)
716 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
720 define float @v_fma_f32_fabs_rhs(float %x, float %y, float %z) {
721 ; GFX6-LABEL: v_fma_f32_fabs_rhs:
723 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX6-NEXT: v_fma_f32 v0, v0, |v1|, v2
725 ; GFX6-NEXT: s_setpc_b64 s[30:31]
727 ; GFX8-LABEL: v_fma_f32_fabs_rhs:
729 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX8-NEXT: v_fma_f32 v0, v0, |v1|, v2
731 ; GFX8-NEXT: s_setpc_b64 s[30:31]
733 ; GFX9-LABEL: v_fma_f32_fabs_rhs:
735 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GFX9-NEXT: v_fma_f32 v0, v0, |v1|, v2
737 ; GFX9-NEXT: s_setpc_b64 s[30:31]
739 ; GFX10-LABEL: v_fma_f32_fabs_rhs:
741 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742 ; GFX10-NEXT: v_fma_f32 v0, v0, |v1|, v2
743 ; GFX10-NEXT: s_setpc_b64 s[30:31]
745 ; GFX11-LABEL: v_fma_f32_fabs_rhs:
747 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748 ; GFX11-NEXT: v_fma_f32 v0, v0, |v1|, v2
749 ; GFX11-NEXT: s_setpc_b64 s[30:31]
750 %fabs.y = call float @llvm.fabs.f32(float %y)
751 %fma = call float @llvm.fma.f32(float %x, float %fabs.y, float %z)
755 define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, float %z) {
756 ; GFX6-LABEL: v_fma_f32_fabs_lhs_rhs:
758 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX6-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
760 ; GFX6-NEXT: s_setpc_b64 s[30:31]
762 ; GFX8-LABEL: v_fma_f32_fabs_lhs_rhs:
764 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
765 ; GFX8-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
766 ; GFX8-NEXT: s_setpc_b64 s[30:31]
768 ; GFX9-LABEL: v_fma_f32_fabs_lhs_rhs:
770 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; GFX9-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
772 ; GFX9-NEXT: s_setpc_b64 s[30:31]
774 ; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs:
776 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777 ; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
778 ; GFX10-NEXT: s_setpc_b64 s[30:31]
780 ; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs:
782 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783 ; GFX11-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
784 ; GFX11-NEXT: s_setpc_b64 s[30:31]
785 %fabs.x = call float @llvm.fabs.f32(float %x)
786 %fabs.y = call float @llvm.fabs.f32(float %y)
787 %fma = call float @llvm.fma.f32(float %fabs.x, float %fabs.y, float %z)
791 define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float inreg %x, float %y, float %z) {
792 ; GFX6-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
794 ; GFX6-NEXT: v_fma_f32 v0, s0, v0, v1
795 ; GFX6-NEXT: ; return to shader part epilog
797 ; GFX8-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
799 ; GFX8-NEXT: v_fma_f32 v0, s0, v0, v1
800 ; GFX8-NEXT: ; return to shader part epilog
802 ; GFX9-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
804 ; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1
805 ; GFX9-NEXT: ; return to shader part epilog
807 ; GFX10-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
809 ; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1
810 ; GFX10-NEXT: ; return to shader part epilog
812 ; GFX11-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
814 ; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1
815 ; GFX11-NEXT: ; return to shader part epilog
816 %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
820 define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float %x, float inreg %y, float %z) {
821 ; GFX6-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
823 ; GFX6-NEXT: v_fma_f32 v0, v0, s0, v1
824 ; GFX6-NEXT: ; return to shader part epilog
826 ; GFX8-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
828 ; GFX8-NEXT: v_fma_f32 v0, v0, s0, v1
829 ; GFX8-NEXT: ; return to shader part epilog
831 ; GFX9-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
833 ; GFX9-NEXT: v_fma_f32 v0, v0, s0, v1
834 ; GFX9-NEXT: ; return to shader part epilog
836 ; GFX10-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
838 ; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1
839 ; GFX10-NEXT: ; return to shader part epilog
841 ; GFX11-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
843 ; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1
844 ; GFX11-NEXT: ; return to shader part epilog
845 %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
849 define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float inreg %x, float inreg %y, float inreg %z) {
850 ; GFX6-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
852 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
853 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
854 ; GFX6-NEXT: v_fma_f32 v0, s0, v0, v1
855 ; GFX6-NEXT: ; return to shader part epilog
857 ; GFX8-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
859 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
860 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
861 ; GFX8-NEXT: v_fma_f32 v0, s0, v0, v1
862 ; GFX8-NEXT: ; return to shader part epilog
864 ; GFX9-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
866 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
867 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
868 ; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1
869 ; GFX9-NEXT: ; return to shader part epilog
871 ; GFX10-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
873 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
874 ; GFX10-NEXT: v_fma_f32 v0, s1, s0, v0
875 ; GFX10-NEXT: ; return to shader part epilog
877 ; GFX11-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
879 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
880 ; GFX11-NEXT: v_fma_f32 v0, s1, s0, v0
881 ; GFX11-NEXT: ; return to shader part epilog
882 %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
886 define float @v_fma_f32_fneg_lhs(float %x, float %y, float %z) {
887 ; GFX6-LABEL: v_fma_f32_fneg_lhs:
889 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; GFX6-NEXT: v_fma_f32 v0, -v0, v1, v2
891 ; GFX6-NEXT: s_setpc_b64 s[30:31]
893 ; GFX8-LABEL: v_fma_f32_fneg_lhs:
895 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896 ; GFX8-NEXT: v_fma_f32 v0, -v0, v1, v2
897 ; GFX8-NEXT: s_setpc_b64 s[30:31]
899 ; GFX9-LABEL: v_fma_f32_fneg_lhs:
901 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902 ; GFX9-NEXT: v_fma_f32 v0, -v0, v1, v2
903 ; GFX9-NEXT: s_setpc_b64 s[30:31]
905 ; GFX10-LABEL: v_fma_f32_fneg_lhs:
907 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
908 ; GFX10-NEXT: v_fma_f32 v0, -v0, v1, v2
909 ; GFX10-NEXT: s_setpc_b64 s[30:31]
911 ; GFX11-LABEL: v_fma_f32_fneg_lhs:
913 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914 ; GFX11-NEXT: v_fma_f32 v0, -v0, v1, v2
915 ; GFX11-NEXT: s_setpc_b64 s[30:31]
916 %neg.x = fneg float %x
917 %fma = call float @llvm.fma.f32(float %neg.x, float %y, float %z)
921 define float @v_fma_f32_fneg_rhs(float %x, float %y, float %z) {
922 ; GFX6-LABEL: v_fma_f32_fneg_rhs:
924 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925 ; GFX6-NEXT: v_fma_f32 v0, v0, -v1, v2
926 ; GFX6-NEXT: s_setpc_b64 s[30:31]
928 ; GFX8-LABEL: v_fma_f32_fneg_rhs:
930 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931 ; GFX8-NEXT: v_fma_f32 v0, v0, -v1, v2
932 ; GFX8-NEXT: s_setpc_b64 s[30:31]
934 ; GFX9-LABEL: v_fma_f32_fneg_rhs:
936 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX9-NEXT: v_fma_f32 v0, v0, -v1, v2
938 ; GFX9-NEXT: s_setpc_b64 s[30:31]
940 ; GFX10-LABEL: v_fma_f32_fneg_rhs:
942 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943 ; GFX10-NEXT: v_fma_f32 v0, v0, -v1, v2
944 ; GFX10-NEXT: s_setpc_b64 s[30:31]
946 ; GFX11-LABEL: v_fma_f32_fneg_rhs:
948 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949 ; GFX11-NEXT: v_fma_f32 v0, v0, -v1, v2
950 ; GFX11-NEXT: s_setpc_b64 s[30:31]
951 %neg.y = fneg float %y
952 %fma = call float @llvm.fma.f32(float %x, float %neg.y, float %z)
956 define float @v_fma_f32_fneg_z(float %x, float %y, float %z) {
957 ; GFX6-LABEL: v_fma_f32_fneg_z:
959 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, -v2
961 ; GFX6-NEXT: s_setpc_b64 s[30:31]
963 ; GFX8-LABEL: v_fma_f32_fneg_z:
965 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX8-NEXT: v_fma_f32 v0, v0, v1, -v2
967 ; GFX8-NEXT: s_setpc_b64 s[30:31]
969 ; GFX9-LABEL: v_fma_f32_fneg_z:
971 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; GFX9-NEXT: v_fma_f32 v0, v0, v1, -v2
973 ; GFX9-NEXT: s_setpc_b64 s[30:31]
975 ; GFX10-LABEL: v_fma_f32_fneg_z:
977 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2
979 ; GFX10-NEXT: s_setpc_b64 s[30:31]
981 ; GFX11-LABEL: v_fma_f32_fneg_z:
983 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX11-NEXT: v_fma_f32 v0, v0, v1, -v2
985 ; GFX11-NEXT: s_setpc_b64 s[30:31]
986 %neg.z = fneg float %z
987 %fma = call float @llvm.fma.f32(float %x, float %y, float %neg.z)
991 define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) {
992 ; GFX6-LABEL: dont_crash_after_fma_mix_select_attempt:
993 ; GFX6: ; %bb.0: ; %.entry
994 ; GFX6-NEXT: v_fma_f32 v0, |s0|, v0, v1
995 ; GFX6-NEXT: ; return to shader part epilog
997 ; GFX8-LABEL: dont_crash_after_fma_mix_select_attempt:
998 ; GFX8: ; %bb.0: ; %.entry
999 ; GFX8-NEXT: v_fma_f32 v0, |s0|, v0, v1
1000 ; GFX8-NEXT: ; return to shader part epilog
1002 ; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt:
1003 ; GFX9: ; %bb.0: ; %.entry
1004 ; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1
1005 ; GFX9-NEXT: ; return to shader part epilog
1007 ; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt:
1008 ; GFX10: ; %bb.0: ; %.entry
1009 ; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1
1010 ; GFX10-NEXT: ; return to shader part epilog
1012 ; GFX11-LABEL: dont_crash_after_fma_mix_select_attempt:
1013 ; GFX11: ; %bb.0: ; %.entry
1014 ; GFX11-NEXT: v_fma_f32 v0, |s0|, v0, v1
1015 ; GFX11-NEXT: ; return to shader part epilog
1017 %fabs.x = call contract float @llvm.fabs.f32(float %x)
1018 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
1022 declare half @llvm.fma.f16(half, half, half) #0
1023 declare float @llvm.fma.f32(float, float, float) #0
1024 declare double @llvm.fma.f64(double, double, double) #0
1026 declare half @llvm.fabs.f16(half) #0
1027 declare float @llvm.fabs.f32(float) #0
1029 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
1030 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0
1031 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
1033 declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #0
1034 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
1036 attributes #0 = { nounwind readnone speculatable willreturn }