1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
9 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
10 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
11 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
12 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
13 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
14 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
16 define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
17 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
19 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
21 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
23 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
25 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
27 ; GFX900-NEXT: s_setpc_b64 s[30:31]
29 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
31 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
33 ; GFX906-NEXT: s_setpc_b64 s[30:31]
35 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
37 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
39 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
40 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
41 ; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
42 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
44 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
46 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
48 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
49 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
50 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
51 ; VI-NEXT: s_setpc_b64 s[30:31]
53 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
55 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
57 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
59 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
61 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
63 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
64 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
65 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
66 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
67 %src0.ext = fpext half %src0 to float
68 %src1.ext = fpext half %src1 to float
69 %src2.ext = fpext half %src2 to float
70 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
74 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
75 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
77 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
79 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
81 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
83 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
85 ; GFX900-NEXT: s_setpc_b64 s[30:31]
87 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
89 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
91 ; GFX906-NEXT: s_setpc_b64 s[30:31]
93 ; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
95 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
97 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
98 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
99 ; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
100 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
102 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
104 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
106 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
107 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
108 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
109 ; VI-NEXT: s_setpc_b64 s[30:31]
111 ; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
113 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
115 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
116 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
117 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v0
118 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
119 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v2
120 ; CI-NEXT: v_mac_f32_e32 v0, v3, v1
121 ; CI-NEXT: s_setpc_b64 s[30:31]
122 %src0.hi = lshr i32 %src0, 16
123 %src1.hi = lshr i32 %src1, 16
124 %src2.hi = lshr i32 %src2, 16
125 %src0.i16 = trunc i32 %src0.hi to i16
126 %src1.i16 = trunc i32 %src1.hi to i16
127 %src2.i16 = trunc i32 %src2.hi to i16
128 %src0.fp16 = bitcast i16 %src0.i16 to half
129 %src1.fp16 = bitcast i16 %src1.i16 to half
130 %src2.fp16 = bitcast i16 %src2.i16 to half
131 %src0.ext = fpext half %src0.fp16 to float
132 %src1.ext = fpext half %src1.fp16 to float
133 %src2.ext = fpext half %src2.fp16 to float
134 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
138 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
139 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
141 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
143 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
145 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
147 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
149 ; GFX900-NEXT: s_setpc_b64 s[30:31]
151 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
153 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
155 ; GFX906-NEXT: s_setpc_b64 s[30:31]
157 ; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
159 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
161 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
162 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
163 ; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
164 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
166 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
168 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
170 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
171 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
172 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
173 ; VI-NEXT: s_setpc_b64 s[30:31]
175 ; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
177 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5
179 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
181 ; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
183 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
185 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
186 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5
187 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2
188 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
189 %src0.hi = extractelement <2 x half> %src0, i32 1
190 %src1.hi = extractelement <2 x half> %src1, i32 1
191 %src2.hi = extractelement <2 x half> %src2, i32 1
192 %src0.ext = fpext half %src0.hi to float
193 %src1.ext = fpext half %src1.hi to float
194 %src2.ext = fpext half %src2.hi to float
195 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
199 define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
200 ; GFX1100-LABEL: v_mad_mix_v2f32:
202 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
204 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
205 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
206 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
207 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
209 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
210 ; SDAG-GFX900: ; %bb.0:
211 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
213 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
214 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
215 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
217 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
218 ; SDAG-GFX906: ; %bb.0:
219 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
221 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
222 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
223 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
225 ; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
226 ; SDAG-GFX9GEN: ; %bb.0:
227 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
229 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
230 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
231 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1
232 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
233 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
234 ; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5
235 ; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6
236 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
238 ; SDAG-VI-LABEL: v_mad_mix_v2f32:
240 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
242 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0
243 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
244 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1
245 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
246 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
247 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5
248 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6
249 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
251 ; SDAG-CI-LABEL: v_mad_mix_v2f32:
253 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
255 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3
256 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
257 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
258 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5
259 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6
260 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
261 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0
262 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
263 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
264 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
265 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6
266 ; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5
267 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3
268 ; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
269 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
271 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
272 ; GISEL-GFX900: ; %bb.0:
273 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
275 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
276 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
277 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
279 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
280 ; GISEL-GFX906: ; %bb.0:
281 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
283 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
284 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
285 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
287 ; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
288 ; GISEL-GFX9GEN: ; %bb.0:
289 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
291 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
292 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1
293 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
294 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
295 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
296 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5
297 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6
298 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
300 ; GISEL-VI-LABEL: v_mad_mix_v2f32:
302 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
304 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
305 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1
306 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
307 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
308 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
309 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5
310 ; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6
311 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
313 ; GISEL-CI-LABEL: v_mad_mix_v2f32:
315 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0
317 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1
318 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
319 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
320 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
321 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5
322 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2
323 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3
324 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
325 %src0.ext = fpext <2 x half> %src0 to <2 x float>
326 %src1.ext = fpext <2 x half> %src1 to <2 x float>
327 %src2.ext = fpext <2 x half> %src2 to <2 x float>
328 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
329 ret <2 x float> %result
332 define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
333 ; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
335 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
337 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
338 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
339 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
340 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
342 ; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
344 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
346 ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
347 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
348 ; GFX900-NEXT: s_setpc_b64 s[30:31]
350 ; GFX906-LABEL: v_mad_mix_v2f32_shuffle:
352 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
354 ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
355 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
356 ; GFX906-NEXT: s_setpc_b64 s[30:31]
358 ; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
360 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
362 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
363 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
364 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
365 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
366 ; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2
367 ; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1
368 ; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2
369 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
371 ; VI-LABEL: v_mad_mix_v2f32_shuffle:
373 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
375 ; VI-NEXT: v_cvt_f32_f16_e32 v4, v0
376 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v1
377 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
378 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
379 ; VI-NEXT: v_mad_f32 v0, v3, v0, v2
380 ; VI-NEXT: v_mac_f32_e32 v2, v4, v1
381 ; VI-NEXT: v_mov_b32_e32 v1, v2
382 ; VI-NEXT: s_setpc_b64 s[30:31]
384 ; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
386 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
388 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
389 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
390 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
391 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
392 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
393 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
394 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v0
395 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v2, v5
396 ; SDAG-CI-NEXT: v_mad_f32 v1, v4, v3, v5
397 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
399 ; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
401 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
403 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
404 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
405 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5
406 ; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
407 ; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4
408 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
409 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
410 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
411 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
412 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
413 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
414 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
415 ; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
416 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
417 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
418 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
419 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
420 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
421 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
422 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
423 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
424 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
425 ret <2 x float> %result
428 define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
429 ; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
431 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
433 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
435 ; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
437 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
439 ; GFX900-NEXT: s_setpc_b64 s[30:31]
441 ; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
443 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
445 ; GFX906-NEXT: s_setpc_b64 s[30:31]
447 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
448 ; SDAG-GFX9GEN: ; %bb.0:
449 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
451 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
452 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
453 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
454 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
456 ; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
458 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
460 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
461 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
462 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
463 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
465 ; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
467 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
469 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
471 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
472 ; GISEL-GFX9GEN: ; %bb.0:
473 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
475 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
476 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
477 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
478 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
480 ; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
482 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
484 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
485 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
486 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
487 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
489 ; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
491 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
493 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
494 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
495 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
496 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
497 %src0.ext = fpext half %src0 to float
498 %src1.ext = fpext half %src1 to float
499 %src2.ext = fpext half %src2 to float
500 %src0.ext.neg = fneg float %src0.ext
501 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
505 define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
506 ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
508 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
510 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
512 ; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
514 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
516 ; GFX900-NEXT: s_setpc_b64 s[30:31]
518 ; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
520 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
522 ; GFX906-NEXT: s_setpc_b64 s[30:31]
524 ; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
526 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
528 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
529 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
530 ; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
531 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
533 ; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
535 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
537 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
538 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
539 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
540 ; VI-NEXT: s_setpc_b64 s[30:31]
542 ; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
544 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545 ; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
546 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
548 ; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
550 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
552 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
553 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
554 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
555 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
556 %src0.ext = fpext half %src0 to float
557 %src1.ext = fpext half %src1 to float
558 %src2.ext = fpext half %src2 to float
559 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
560 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
564 define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
565 ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
567 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
569 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
571 ; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
573 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
575 ; GFX900-NEXT: s_setpc_b64 s[30:31]
577 ; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
579 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
581 ; GFX906-NEXT: s_setpc_b64 s[30:31]
583 ; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
585 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
587 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
588 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
589 ; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2
590 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
592 ; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
594 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
596 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
597 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
598 ; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
599 ; VI-NEXT: s_setpc_b64 s[30:31]
601 ; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
603 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
605 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
607 ; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
609 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
611 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
612 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
613 ; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
614 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
615 %src0.ext = fpext half %src0 to float
616 %src1.ext = fpext half %src1 to float
617 %src2.ext = fpext half %src2 to float
618 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
619 %src0.ext.neg.abs = fneg float %src0.ext.abs
620 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
624 define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
625 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
627 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
629 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
631 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
633 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
637 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
639 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
641 ; GFX906-NEXT: s_setpc_b64 s[30:31]
643 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
645 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
647 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
648 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
649 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
651 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
653 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
655 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
656 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
657 ; VI-NEXT: s_setpc_b64 s[30:31]
659 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
661 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
663 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
665 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
667 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
669 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
670 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
671 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
672 %src0.ext = fpext half %src0 to float
673 %src1.ext = fpext half %src1 to float
674 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
678 define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
679 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
681 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
683 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
685 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
687 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
689 ; GFX900-NEXT: s_setpc_b64 s[30:31]
691 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
693 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
695 ; GFX906-NEXT: s_setpc_b64 s[30:31]
697 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
699 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
701 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
702 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2
703 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
705 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
707 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
709 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
710 ; VI-NEXT: v_mad_f32 v0, v0, v1, -v2
711 ; VI-NEXT: s_setpc_b64 s[30:31]
713 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
715 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
717 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
719 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
721 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
723 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
724 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
725 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
726 %src0.ext = fpext half %src0 to float
727 %src1.ext = fpext half %src1 to float
728 %src2.neg = fneg float %src2
729 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
733 define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
734 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
736 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
738 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
740 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
742 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
744 ; GFX900-NEXT: s_setpc_b64 s[30:31]
746 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
748 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
750 ; GFX906-NEXT: s_setpc_b64 s[30:31]
752 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
754 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
756 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
757 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2|
758 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
760 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
762 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
764 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
765 ; VI-NEXT: v_mad_f32 v0, v0, v1, |v2|
766 ; VI-NEXT: s_setpc_b64 s[30:31]
768 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
770 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
772 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
774 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
776 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
778 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
779 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
780 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
781 %src0.ext = fpext half %src0 to float
782 %src1.ext = fpext half %src1 to float
783 %src2.abs = call float @llvm.fabs.f32(float %src2)
784 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
788 define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
789 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
791 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
793 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
795 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
797 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
799 ; GFX900-NEXT: s_setpc_b64 s[30:31]
801 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
803 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
805 ; GFX906-NEXT: s_setpc_b64 s[30:31]
807 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
809 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
811 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
812 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2|
813 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
815 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
817 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
819 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
820 ; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
821 ; VI-NEXT: s_setpc_b64 s[30:31]
823 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
825 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
827 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
829 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
831 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
833 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
834 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
835 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
836 %src0.ext = fpext half %src0 to float
837 %src1.ext = fpext half %src1 to float
838 %src2.abs = call float @llvm.fabs.f32(float %src2)
839 %src2.neg.abs = fneg float %src2.abs
840 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
844 ; TODO: Fold inline immediates. Need to be careful because it is an
845 ; f16 inline immediate that may be converted to f32, not an actual f32
848 define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
849 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
850 ; SDAG-GFX1100: ; %bb.0:
851 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
852 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
853 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
854 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
855 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
857 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
858 ; SDAG-GFX900: ; %bb.0:
859 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
861 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
862 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
864 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
865 ; SDAG-GFX906: ; %bb.0:
866 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
868 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
869 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
871 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
873 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
874 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
875 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
876 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0
877 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
879 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
881 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
883 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
884 ; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0
885 ; VI-NEXT: s_setpc_b64 s[30:31]
887 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
889 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
891 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
893 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
894 ; GISEL-GFX1100: ; %bb.0:
895 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0
897 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
898 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
899 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
901 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
902 ; GISEL-GFX900: ; %bb.0:
903 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0
905 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
906 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
908 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
909 ; GISEL-GFX906: ; %bb.0:
910 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0
912 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
913 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
915 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
917 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
918 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
919 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
920 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
921 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
922 %src0.ext = fpext half %src0 to float
923 %src1.ext = fpext half %src1 to float
924 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
928 define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
929 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
930 ; SDAG-GFX1100: ; %bb.0:
931 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
932 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
933 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
934 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
935 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
937 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
938 ; SDAG-GFX900: ; %bb.0:
939 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
941 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
942 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
944 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
945 ; SDAG-GFX906: ; %bb.0:
946 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
948 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
949 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
951 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
953 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
955 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
956 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
957 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
959 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
961 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
963 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
964 ; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
965 ; VI-NEXT: s_setpc_b64 s[30:31]
967 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
969 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
971 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
973 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
974 ; GISEL-GFX1100: ; %bb.0:
975 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494
977 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
978 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
979 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
981 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
982 ; GISEL-GFX900: ; %bb.0:
983 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494
985 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
986 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
988 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
989 ; GISEL-GFX906: ; %bb.0:
990 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494
992 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
993 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
995 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
997 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
999 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1000 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
1001 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
1002 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1003 %src0.ext = fpext half %src0 to float
1004 %src1.ext = fpext half %src1 to float
1005 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
1009 ; Attempt to break inline immediate folding. If the operand is
1010 ; interpreted as f32, the inline immediate is really the f16 inline
1011 ; imm value converted to f32.
1012 ; fpext f16 1/2pi = 0x3e230000
1013 ; f32 1/2pi = 0x3e22f983
1015 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
1016 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1017 ; SDAG-GFX1100: ; %bb.0:
1018 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
1020 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1021 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
1022 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1024 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1025 ; SDAG-GFX900: ; %bb.0:
1026 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
1028 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1029 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1031 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1032 ; SDAG-GFX906: ; %bb.0:
1033 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
1035 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1036 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1038 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1039 ; SDAG-GFX9GEN: ; %bb.0:
1040 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1042 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1043 ; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
1044 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1046 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1048 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1050 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1051 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
1052 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1054 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1056 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
1058 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1060 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1061 ; GISEL-GFX1100: ; %bb.0:
1062 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000
1064 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1065 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1066 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1068 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1069 ; GISEL-GFX900: ; %bb.0:
1070 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000
1072 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1073 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1075 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1076 ; GISEL-GFX906: ; %bb.0:
1077 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1078 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000
1079 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1080 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1082 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1083 ; GISEL-GFX9GEN: ; %bb.0:
1084 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
1086 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1087 ; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000
1088 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
1089 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1091 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1092 ; GISEL-VI: ; %bb.0:
1093 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1095 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1096 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000
1097 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
1098 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1100 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1101 ; GISEL-CI: ; %bb.0:
1102 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
1104 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1105 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000
1106 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
1107 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1108 %src0.ext = fpext half %src0 to float
1109 %src1.ext = fpext half %src1 to float
1110 %src2 = fpext half 0xH3118 to float
1111 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1116 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
1117 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1118 ; SDAG-GFX1100: ; %bb.0:
1119 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000
1121 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1122 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
1123 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1125 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1126 ; SDAG-GFX900: ; %bb.0:
1127 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000
1129 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1130 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1132 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1133 ; SDAG-GFX906: ; %bb.0:
1134 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000
1136 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1137 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1139 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1140 ; SDAG-GFX9GEN: ; %bb.0:
1141 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1143 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1144 ; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
1145 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1147 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1149 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1151 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1152 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
1153 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1155 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1157 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
1159 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1161 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1162 ; GISEL-GFX1100: ; %bb.0:
1163 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000
1165 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1166 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1167 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1169 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1170 ; GISEL-GFX900: ; %bb.0:
1171 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000
1173 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1174 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1176 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1177 ; GISEL-GFX906: ; %bb.0:
1178 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000
1180 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1181 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1183 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1184 ; GISEL-GFX9GEN: ; %bb.0:
1185 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
1187 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1188 ; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000
1189 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
1190 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1192 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1193 ; GISEL-VI: ; %bb.0:
1194 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1195 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1196 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1197 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1198 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
1199 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1201 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1202 ; GISEL-CI: ; %bb.0:
1203 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
1205 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1206 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1207 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
1208 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1209 %src0.ext = fpext half %src0 to float
1210 %src1.ext = fpext half %src1 to float
1211 %src2 = fpext half 0xH003F to float
1212 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1216 define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
1217 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1218 ; SDAG-GFX1100: ; %bb.0:
1219 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
1221 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1222 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1223 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1224 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1225 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1227 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1228 ; SDAG-GFX900: ; %bb.0:
1229 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
1231 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1232 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1233 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1234 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1236 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1237 ; SDAG-GFX906: ; %bb.0:
1238 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
1240 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1241 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1242 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1243 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1245 ; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
1246 ; SDAG-GFX9GEN: ; %bb.0:
1247 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1249 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1250 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
1251 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1252 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0
1253 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0
1254 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1256 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1258 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1260 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1261 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1262 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1263 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
1264 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
1265 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1267 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1269 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1270 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1271 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1272 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1273 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1274 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1275 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1276 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1277 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1278 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1279 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1280 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1282 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1283 ; GISEL-GFX1100: ; %bb.0:
1284 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 1.0
1286 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1287 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1288 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1289 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1290 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1292 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1293 ; GISEL-GFX900: ; %bb.0:
1294 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 1.0
1296 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1297 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1298 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1299 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1301 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1302 ; GISEL-GFX906: ; %bb.0:
1303 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 1.0
1305 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1306 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1307 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1308 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1310 ; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
1311 ; GISEL-GFX9GEN: ; %bb.0:
1312 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
1314 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1315 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
1316 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1317 ; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0
1318 ; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0
1319 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1321 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1322 ; GISEL-VI: ; %bb.0:
1323 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1325 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1326 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1327 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1328 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0
1329 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0
1330 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1332 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1333 ; GISEL-CI: ; %bb.0:
1334 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1336 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1337 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1338 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1339 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1340 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1341 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1342 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1343 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1344 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
1345 ret <2 x float> %result
1348 define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1349 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1350 ; SDAG-GFX1100: ; %bb.0:
1351 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
1353 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1354 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1355 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1356 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1357 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1359 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1360 ; SDAG-GFX900: ; %bb.0:
1361 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1362 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
1363 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1364 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1365 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1366 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1368 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1369 ; SDAG-GFX906: ; %bb.0:
1370 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
1372 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1373 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1374 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1375 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1377 ; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1378 ; SDAG-GFX9GEN: ; %bb.0:
1379 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1381 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1382 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
1383 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1384 ; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
1385 ; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
1386 ; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4
1387 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1389 ; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1391 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1393 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1394 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1395 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1396 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1397 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
1398 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4
1399 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1401 ; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1403 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1405 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1406 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1407 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1408 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1409 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1410 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1411 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1412 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1413 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
1414 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1415 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1417 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1418 ; GISEL-GFX1100: ; %bb.0:
1419 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0x3e230000
1421 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1422 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1423 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1424 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1425 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1427 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1428 ; GISEL-GFX900: ; %bb.0:
1429 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0x3e230000
1431 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1432 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1433 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1434 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1436 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1437 ; GISEL-GFX906: ; %bb.0:
1438 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0x3e230000
1440 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1441 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1442 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1443 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1445 ; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1446 ; GISEL-GFX9GEN: ; %bb.0:
1447 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
1449 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1450 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
1451 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1452 ; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
1453 ; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
1454 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4
1455 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1457 ; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1458 ; GISEL-VI: ; %bb.0:
1459 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1461 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1462 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1463 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1464 ; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1465 ; GISEL-VI-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
1466 ; GISEL-VI-NEXT: v_mac_f32_e32 v1, v3, v4
1467 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1469 ; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1470 ; GISEL-CI: ; %bb.0:
1471 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1473 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1474 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1475 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1476 ; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1477 ; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
1478 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1479 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1480 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1481 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1482 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1483 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
1484 ret <2 x float> %result
1487 define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1488 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1489 ; SDAG-GFX1100: ; %bb.0:
1490 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
1492 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1493 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1494 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1495 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1496 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1498 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1499 ; SDAG-GFX900: ; %bb.0:
1500 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1501 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
1502 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1503 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1504 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1505 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1507 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1508 ; SDAG-GFX906: ; %bb.0:
1509 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
1511 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1512 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1513 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1514 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1516 ; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1517 ; SDAG-GFX9GEN: ; %bb.0:
1518 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1520 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1521 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
1522 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1523 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
1524 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
1525 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1527 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1529 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1531 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1532 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1533 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1534 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
1535 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
1536 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1538 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1540 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1542 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1543 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1544 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1545 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1546 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1547 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1548 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1549 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
1550 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
1551 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1552 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1554 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1555 ; GISEL-GFX1100: ; %bb.0:
1556 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0.15915494
1558 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1559 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1560 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1561 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1562 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1564 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1565 ; GISEL-GFX900: ; %bb.0:
1566 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0.15915494
1568 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1569 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1570 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1571 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1573 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1574 ; GISEL-GFX906: ; %bb.0:
1575 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0.15915494
1577 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1578 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1579 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1580 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1582 ; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1583 ; GISEL-GFX9GEN: ; %bb.0:
1584 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
1586 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1587 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
1588 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1589 ; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
1590 ; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
1591 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1593 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1594 ; GISEL-VI: ; %bb.0:
1595 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1597 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1598 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1599 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1600 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
1601 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
1602 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1604 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1605 ; GISEL-CI: ; %bb.0:
1606 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1608 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1609 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1610 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1611 ; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
1612 ; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
1613 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1614 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1615 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1616 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1617 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1618 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
1619 ret <2 x float> %result
1622 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1623 ; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1625 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1627 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1631 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1633 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1635 ; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1637 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1639 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1641 ; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1643 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1645 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1646 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1647 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1648 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1650 ; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1652 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1654 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1655 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1656 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1657 ; VI-NEXT: s_setpc_b64 s[30:31]
1659 ; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1661 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
1663 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1665 ; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1666 ; GISEL-CI: ; %bb.0:
1667 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1
1669 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3
1670 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5
1671 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1672 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1673 %src0.hi = extractelement <2 x half> %src0, i32 1
1674 %src1.hi = extractelement <2 x half> %src1, i32 1
1675 %src2.hi = extractelement <2 x half> %src2, i32 1
1676 %src0.ext = fpext half %src0.hi to float
1677 %src1.ext = fpext half %src1.hi to float
1678 %src2.ext = fpext half %src2.hi to float
1679 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1680 %max = call float @llvm.maxnum.f32(float %result, float 0.0)
1681 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
1685 define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
1686 ; GFX1100-LABEL: no_mix_simple:
1688 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2
1690 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1692 ; GFX900-LABEL: no_mix_simple:
1694 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695 ; GFX900-NEXT: v_mad_f32 v0, v0, v1, v2
1696 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1698 ; GFX906-LABEL: no_mix_simple:
1700 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1701 ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
1702 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1704 ; GFX9GEN-LABEL: no_mix_simple:
1706 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
1708 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1710 ; VI-LABEL: no_mix_simple:
1712 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
1714 ; VI-NEXT: s_setpc_b64 s[30:31]
1716 ; CI-LABEL: no_mix_simple:
1718 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719 ; CI-NEXT: v_mad_f32 v0, v0, v1, v2
1720 ; CI-NEXT: s_setpc_b64 s[30:31]
1721 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
1725 define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
1726 ; GFX1100-LABEL: no_mix_simple_fabs:
1728 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729 ; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2
1730 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1732 ; GFX900-LABEL: no_mix_simple_fabs:
1734 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735 ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
1736 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1738 ; GFX906-LABEL: no_mix_simple_fabs:
1740 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741 ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
1742 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1744 ; GFX9GEN-LABEL: no_mix_simple_fabs:
1746 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747 ; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
1748 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1750 ; VI-LABEL: no_mix_simple_fabs:
1752 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1754 ; VI-NEXT: s_setpc_b64 s[30:31]
1756 ; CI-LABEL: no_mix_simple_fabs:
1758 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1760 ; CI-NEXT: s_setpc_b64 s[30:31]
1761 %src0.fabs = call float @llvm.fabs.f32(float %src0)
1762 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
1766 ; FIXME(DAG): Should abe able to select in this case.
1767 ; All sources are converted from f16, so it doesn't matter
1768 ; v_mad_mix_f32 flushes.
1770 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
1771 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1773 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1775 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1777 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1779 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1781 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1782 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1783 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1784 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1786 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1788 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1790 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1792 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1794 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1796 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1797 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
1798 ; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
1799 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1801 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1803 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1805 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1806 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1807 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1808 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1809 ; VI-NEXT: s_setpc_b64 s[30:31]
1811 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1813 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1814 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1815 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1817 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1818 ; GISEL-CI: ; %bb.0:
1819 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1821 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1822 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1823 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1824 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1825 %src0.ext = fpext half %src0 to float
1826 %src1.ext = fpext half %src1 to float
1827 %src2.ext = fpext half %src2 to float
1828 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1832 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
1833 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1835 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1837 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1839 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1841 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1842 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1843 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1844 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1845 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1847 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1849 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1851 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1853 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1855 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1856 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1857 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1858 ; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
1859 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1861 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1863 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1865 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1866 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1867 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1868 ; VI-NEXT: s_setpc_b64 s[30:31]
1870 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1872 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1874 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1876 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1877 ; GISEL-CI: ; %bb.0:
1878 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1880 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1881 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1882 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1883 %src0.ext = fpext half %src0 to float
1884 %src1.ext = fpext half %src1 to float
1885 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1889 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
1890 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1892 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1893 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1894 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1895 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2
1896 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1897 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1898 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1899 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1901 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1903 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1904 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1905 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1906 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1907 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1908 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1909 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1911 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1913 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1915 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1916 ; GFX906-NEXT: v_cvt_f32_f16_e32 v2, v2
1917 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1918 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1919 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1921 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1923 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1925 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1926 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
1927 ; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
1928 ; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
1929 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
1931 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1933 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1934 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1935 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1936 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1937 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1938 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1939 ; VI-NEXT: s_setpc_b64 s[30:31]
1941 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1943 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1945 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
1946 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1948 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1949 ; GISEL-CI: ; %bb.0:
1950 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1952 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1953 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1954 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1955 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
1956 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1957 %src0.ext = fpext half %src0 to float
1958 %src1.ext = fpext half %src1 to float
1959 %src2.ext = fpext half %src2 to float
1960 %mul = fmul float %src0.ext, %src1.ext
1961 %result = fadd float %mul, %src2.ext
1965 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
1966 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1968 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1970 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1971 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1972 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1973 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1974 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1976 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1978 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1979 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1980 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1981 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1982 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1983 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1985 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1987 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1988 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1989 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1990 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1991 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1992 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1994 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1996 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
1998 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
1999 ; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
2000 ; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
2001 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2003 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2005 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
2007 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2008 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
2009 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
2010 ; VI-NEXT: s_setpc_b64 s[30:31]
2012 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2014 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2016 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
2017 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2019 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2020 ; GISEL-CI: ; %bb.0:
2021 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2022 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2023 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2024 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2025 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
2026 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2027 %src0.ext = fpext half %src0 to float
2028 %src1.ext = fpext half %src1 to float
2029 %mul = fmul float %src0.ext, %src1.ext
2030 %result = fadd float %mul, %src2
2034 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
2035 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2037 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2038 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2039 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2041 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2043 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2045 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2047 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2049 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2051 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2053 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2055 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2056 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
2057 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2058 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2059 ; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2060 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2062 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2064 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
2066 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2067 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2068 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
2069 ; VI-NEXT: s_setpc_b64 s[30:31]
2071 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2073 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2074 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2075 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2077 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2078 ; GISEL-CI: ; %bb.0:
2079 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2080 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2081 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2082 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2083 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2084 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2085 %src0.ext = fpext half %src0 to float
2086 %src1.ext = fpext half %src1 to float
2087 %src2.ext = fpext half %src2 to float
2088 %mul = fmul contract float %src0.ext, %src1.ext
2089 %result = fadd contract float %mul, %src2.ext
2093 define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
2094 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2096 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2098 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2100 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2103 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2104 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2106 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2108 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2110 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2112 ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2114 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2115 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
2116 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2117 ; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
2118 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2120 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2122 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
2124 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2125 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
2126 ; VI-NEXT: s_setpc_b64 s[30:31]
2128 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2130 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2131 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2132 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2134 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2135 ; GISEL-CI: ; %bb.0:
2136 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2138 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2139 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2140 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2141 %src0.ext = fpext half %src0 to float
2142 %src1.ext = fpext half %src1 to float
2143 %mul = fmul contract float %src0.ext, %src1.ext
2144 %result = fadd contract float %mul, %src2
2148 define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2149 ; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2151 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2152 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2153 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2155 ; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2157 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2158 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2159 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2161 ; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2163 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2164 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2165 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2167 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2168 ; SDAG-GFX9GEN: ; %bb.0:
2169 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2170 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
2171 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2172 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
2173 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
2174 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2176 ; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2178 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
2180 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2181 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2182 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2183 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2185 ; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2187 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2188 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2189 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
2190 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2192 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2193 ; GISEL-GFX9GEN: ; %bb.0:
2194 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
2196 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2197 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2198 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2199 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2201 ; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2202 ; GISEL-VI: ; %bb.0:
2203 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
2205 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2206 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2207 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2208 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2210 ; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2211 ; GISEL-CI: ; %bb.0:
2212 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2213 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
2214 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2215 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2216 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2217 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2218 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2219 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0
2220 %src0.neg = fneg half %src0
2221 %src0.ext = fpext half %src0.neg to float
2222 %src1.ext = fpext half %src1 to float
2223 %src2.ext = fpext half %src2 to float
2224 ; %src0.ext.neg = fneg float %src0.ext
2225 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2229 ; Make sure we don't fold pre-cvt fneg if we already have a fabs
2231 define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2232 ; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2234 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2235 ; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2236 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2237 ; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0
2238 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2239 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2241 ; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2243 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GFX900-NEXT: s_mov_b32 s4, 0x8000
2245 ; GFX900-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2246 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2247 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2249 ; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2251 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252 ; GFX906-NEXT: s_mov_b32 s4, 0x8000
2253 ; GFX906-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2254 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2255 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2257 ; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2259 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2261 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2262 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
2263 ; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
2264 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2266 ; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2268 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2269 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2270 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2271 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2272 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
2273 ; VI-NEXT: s_setpc_b64 s[30:31]
2275 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2277 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2279 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2280 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2281 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2283 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2284 ; GISEL-CI: ; %bb.0:
2285 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2287 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2288 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2289 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2290 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
2291 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2292 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2293 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
2294 %src0.neg = fneg half %src0
2295 %src0.ext = fpext half %src0.neg to float
2296 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
2297 %src1.ext = fpext half %src1 to float
2298 %src2.ext = fpext half %src2 to float
2299 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
2303 define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2304 ; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2306 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2307 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2308 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2310 ; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2312 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2314 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2316 ; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2318 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2319 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2320 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2322 ; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2324 ; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325 ; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2326 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2327 ; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2328 ; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2329 ; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2331 ; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2333 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2335 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2336 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2337 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
2338 ; VI-NEXT: s_setpc_b64 s[30:31]
2340 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2342 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2344 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2345 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2346 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2348 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2349 ; GISEL-CI: ; %bb.0:
2350 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2352 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0|
2353 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2354 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2355 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2356 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2357 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2358 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
2359 %src0.abs = call half @llvm.fabs.f16(half %src0)
2360 %src0.ext = fpext half %src0.abs to float
2361 %src1.ext = fpext half %src1 to float
2362 %src2.ext = fpext half %src2 to float
2363 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2367 define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2368 ; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2370 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2371 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2372 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2374 ; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2376 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2378 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2380 ; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2382 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2383 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2384 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2386 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2387 ; SDAG-GFX9GEN: ; %bb.0:
2388 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2389 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2390 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2391 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
2392 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
2393 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2395 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2397 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2399 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2400 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2401 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2402 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2404 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2406 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2408 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2409 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2410 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2412 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2413 ; GISEL-GFX9GEN: ; %bb.0:
2414 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415 ; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2416 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2417 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2418 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2419 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2420 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2422 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2423 ; GISEL-VI: ; %bb.0:
2424 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425 ; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2426 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2427 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2428 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2429 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2430 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2432 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2433 ; GISEL-CI: ; %bb.0:
2434 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435 ; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2436 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2437 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2438 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2439 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2440 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2441 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2442 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2443 %fneg = fneg <2 x half> %src0.arg.bc
2444 %src0 = extractelement <2 x half> %fneg, i32 1
2445 %src0.ext = fpext half %src0 to float
2446 %src1.ext = fpext half %src1 to float
2447 %src2.ext = fpext half %src2 to float
2448 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2452 define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2453 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2455 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2457 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2459 ; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2461 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2462 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2463 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2465 ; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2467 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2469 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2471 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2472 ; SDAG-GFX9GEN: ; %bb.0:
2473 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2475 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2476 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2477 ; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2478 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2480 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2482 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2483 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2484 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2485 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2486 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2487 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2489 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2491 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2492 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2493 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2494 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2495 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2497 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2498 ; GISEL-GFX9GEN: ; %bb.0:
2499 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2500 ; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2501 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2502 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2503 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2504 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2505 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2507 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2508 ; GISEL-VI: ; %bb.0:
2509 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2510 ; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2511 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2512 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2513 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2514 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2515 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2517 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2518 ; GISEL-CI: ; %bb.0:
2519 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2521 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2522 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2523 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2524 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2525 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2526 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2527 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2528 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2529 %src0 = extractelement <2 x half> %fabs, i32 1
2530 %src0.ext = fpext half %src0 to float
2531 %src1.ext = fpext half %src1 to float
2532 %src2.ext = fpext half %src2 to float
2533 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2537 define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2538 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2540 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2542 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2544 ; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2546 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2547 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2548 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2550 ; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2552 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2554 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2556 ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2557 ; SDAG-GFX9GEN: ; %bb.0:
2558 ; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2559 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2560 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2561 ; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
2562 ; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
2563 ; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2565 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2567 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2568 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2569 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2570 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2571 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2572 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2574 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2576 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2577 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2578 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
2579 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2580 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2582 ; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2583 ; GISEL-GFX9GEN: ; %bb.0:
2584 ; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2585 ; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2586 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2587 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
2588 ; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
2589 ; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
2590 ; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
2592 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2593 ; GISEL-VI: ; %bb.0:
2594 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2595 ; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2596 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2597 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2598 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2599 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2600 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2602 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2603 ; GISEL-CI: ; %bb.0:
2604 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605 ; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2606 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2607 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2608 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2609 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2610 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2611 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2612 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2613 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2614 %fneg.fabs = fneg <2 x half> %fabs
2615 %src0 = extractelement <2 x half> %fneg.fabs, i32 1
2616 %src0.ext = fpext half %src0 to float
2617 %src1.ext = fpext half %src1 to float
2618 %src2.ext = fpext half %src2 to float
2619 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2623 declare half @llvm.fabs.f16(half) #2
2624 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
2625 declare float @llvm.fabs.f32(float) #2
2626 declare float @llvm.minnum.f32(float, float) #2
2627 declare float @llvm.maxnum.f32(float, float) #2
2628 declare float @llvm.fmuladd.f32(float, float, float) #2
2629 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
2631 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2632 attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
2633 attributes #2 = { nounwind readnone speculatable }