1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
5 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
6 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
8 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
9 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
10 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
11 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
12 ; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
14 define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
15 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
17 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
19 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
21 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
23 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
25 ; GFX900-NEXT: s_setpc_b64 s[30:31]
27 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
29 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
31 ; GFX906-NEXT: s_setpc_b64 s[30:31]
33 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
35 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
37 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
38 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
39 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
40 ; VI-NEXT: s_setpc_b64 s[30:31]
42 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
44 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
46 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
48 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
50 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
52 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
53 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
54 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
55 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
56 %src0.ext = fpext half %src0 to float
57 %src1.ext = fpext half %src1 to float
58 %src2.ext = fpext half %src2 to float
59 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
63 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
64 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
66 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
68 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
70 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
72 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
74 ; GFX900-NEXT: s_setpc_b64 s[30:31]
76 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
78 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
80 ; GFX906-NEXT: s_setpc_b64 s[30:31]
82 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
84 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
86 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
87 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
88 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
89 ; VI-NEXT: s_setpc_b64 s[30:31]
91 ; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
93 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
95 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
96 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
97 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v0
98 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
99 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v2
100 ; CI-NEXT: v_mac_f32_e32 v0, v3, v1
101 ; CI-NEXT: s_setpc_b64 s[30:31]
102 %src0.hi = lshr i32 %src0, 16
103 %src1.hi = lshr i32 %src1, 16
104 %src2.hi = lshr i32 %src2, 16
105 %src0.i16 = trunc i32 %src0.hi to i16
106 %src1.i16 = trunc i32 %src1.hi to i16
107 %src2.i16 = trunc i32 %src2.hi to i16
108 %src0.fp16 = bitcast i16 %src0.i16 to half
109 %src1.fp16 = bitcast i16 %src1.i16 to half
110 %src2.fp16 = bitcast i16 %src2.i16 to half
111 %src0.ext = fpext half %src0.fp16 to float
112 %src1.ext = fpext half %src1.fp16 to float
113 %src2.ext = fpext half %src2.fp16 to float
114 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
118 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
119 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
121 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
123 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
125 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
127 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
129 ; GFX900-NEXT: s_setpc_b64 s[30:31]
131 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
133 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
135 ; GFX906-NEXT: s_setpc_b64 s[30:31]
137 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
139 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
141 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
142 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
143 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
144 ; VI-NEXT: s_setpc_b64 s[30:31]
146 ; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
148 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5
150 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
152 ; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
154 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
156 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
157 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5
158 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2
159 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
160 %src0.hi = extractelement <2 x half> %src0, i32 1
161 %src1.hi = extractelement <2 x half> %src1, i32 1
162 %src2.hi = extractelement <2 x half> %src2, i32 1
163 %src0.ext = fpext half %src0.hi to float
164 %src1.ext = fpext half %src1.hi to float
165 %src2.ext = fpext half %src2.hi to float
166 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
170 define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
171 ; GFX1100-LABEL: v_mad_mix_v2f32:
173 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
175 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
176 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
177 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
178 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
180 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
181 ; SDAG-GFX900: ; %bb.0:
182 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
184 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
185 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
186 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
188 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
189 ; SDAG-GFX906: ; %bb.0:
190 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
192 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
193 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
194 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
196 ; SDAG-VI-LABEL: v_mad_mix_v2f32:
198 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
200 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0
201 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
202 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1
203 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
204 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
205 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5
206 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6
207 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
209 ; SDAG-CI-LABEL: v_mad_mix_v2f32:
211 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
213 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3
214 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
215 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
216 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5
217 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6
218 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
219 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0
220 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
221 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
222 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
223 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6
224 ; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5
225 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3
226 ; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
227 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
229 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
230 ; GISEL-GFX900: ; %bb.0:
231 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
233 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
234 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
235 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
237 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
238 ; GISEL-GFX906: ; %bb.0:
239 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
241 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
242 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
243 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
245 ; GISEL-VI-LABEL: v_mad_mix_v2f32:
247 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
249 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
250 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1
251 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
252 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
253 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
254 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5
255 ; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6
256 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
258 ; GISEL-CI-LABEL: v_mad_mix_v2f32:
260 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0
262 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1
263 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
264 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
265 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
266 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5
267 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2
268 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3
269 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
270 %src0.ext = fpext <2 x half> %src0 to <2 x float>
271 %src1.ext = fpext <2 x half> %src1 to <2 x float>
272 %src2.ext = fpext <2 x half> %src2 to <2 x float>
273 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
274 ret <2 x float> %result
277 define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
278 ; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
280 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
282 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
283 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
284 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
285 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
287 ; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
289 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
291 ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
292 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
293 ; GFX900-NEXT: s_setpc_b64 s[30:31]
295 ; GFX906-LABEL: v_mad_mix_v2f32_shuffle:
297 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298 ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
299 ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
300 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
301 ; GFX906-NEXT: s_setpc_b64 s[30:31]
303 ; VI-LABEL: v_mad_mix_v2f32_shuffle:
305 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
307 ; VI-NEXT: v_cvt_f32_f16_e32 v4, v0
308 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v1
309 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
310 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
311 ; VI-NEXT: v_mad_f32 v0, v3, v0, v2
312 ; VI-NEXT: v_mac_f32_e32 v2, v4, v1
313 ; VI-NEXT: v_mov_b32_e32 v1, v2
314 ; VI-NEXT: s_setpc_b64 s[30:31]
316 ; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
318 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
320 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5
321 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
322 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1
323 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
324 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
325 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4
326 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
327 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5
328 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
329 ; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1
330 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3
331 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
333 ; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
335 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
337 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
338 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
339 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5
340 ; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
341 ; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4
342 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
343 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
344 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
345 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
346 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
347 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
348 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
349 ; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
350 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
351 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
352 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
353 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
354 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
355 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
356 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
357 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
358 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
359 ret <2 x float> %result
362 define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
363 ; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
365 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
367 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
369 ; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
371 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
373 ; GFX900-NEXT: s_setpc_b64 s[30:31]
375 ; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
377 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
379 ; GFX906-NEXT: s_setpc_b64 s[30:31]
381 ; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
383 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
385 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
386 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
387 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
388 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
390 ; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
392 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
394 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
396 ; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
398 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
400 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
401 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
402 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
403 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
405 ; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
407 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
409 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
410 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
411 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
412 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
413 %src0.ext = fpext half %src0 to float
414 %src1.ext = fpext half %src1 to float
415 %src2.ext = fpext half %src2 to float
416 %src0.ext.neg = fneg float %src0.ext
417 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
421 define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
422 ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
424 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
426 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
428 ; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
430 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
432 ; GFX900-NEXT: s_setpc_b64 s[30:31]
434 ; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
436 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
438 ; GFX906-NEXT: s_setpc_b64 s[30:31]
440 ; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
442 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
444 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
445 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
446 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
447 ; VI-NEXT: s_setpc_b64 s[30:31]
449 ; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
451 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452 ; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
453 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
455 ; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
457 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
458 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
459 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
460 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
461 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
462 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
463 %src0.ext = fpext half %src0 to float
464 %src1.ext = fpext half %src1 to float
465 %src2.ext = fpext half %src2 to float
466 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
467 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
471 define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
472 ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
474 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
476 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
478 ; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
480 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
482 ; GFX900-NEXT: s_setpc_b64 s[30:31]
484 ; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
486 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
488 ; GFX906-NEXT: s_setpc_b64 s[30:31]
490 ; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
492 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
494 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
495 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
496 ; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
497 ; VI-NEXT: s_setpc_b64 s[30:31]
499 ; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
501 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502 ; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
503 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
505 ; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
507 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
509 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
510 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
511 ; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
512 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
513 %src0.ext = fpext half %src0 to float
514 %src1.ext = fpext half %src1 to float
515 %src2.ext = fpext half %src2 to float
516 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
517 %src0.ext.neg.abs = fneg float %src0.ext.abs
518 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
522 define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
523 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
525 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
527 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
529 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
531 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
533 ; GFX900-NEXT: s_setpc_b64 s[30:31]
535 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
537 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
539 ; GFX906-NEXT: s_setpc_b64 s[30:31]
541 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
543 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
545 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
546 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
547 ; VI-NEXT: s_setpc_b64 s[30:31]
549 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
551 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
553 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
555 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
557 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
559 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
560 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
561 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
562 %src0.ext = fpext half %src0 to float
563 %src1.ext = fpext half %src1 to float
564 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
568 define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
569 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
571 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
573 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
575 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
577 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
581 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
583 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
585 ; GFX906-NEXT: s_setpc_b64 s[30:31]
587 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
589 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
591 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
592 ; VI-NEXT: v_mad_f32 v0, v0, v1, -v2
593 ; VI-NEXT: s_setpc_b64 s[30:31]
595 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
597 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
599 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
601 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
603 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
605 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
606 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
607 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
608 %src0.ext = fpext half %src0 to float
609 %src1.ext = fpext half %src1 to float
610 %src2.neg = fneg float %src2
611 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
615 define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
616 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
618 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
620 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
622 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
624 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
626 ; GFX900-NEXT: s_setpc_b64 s[30:31]
628 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
630 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
632 ; GFX906-NEXT: s_setpc_b64 s[30:31]
634 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
636 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
638 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
639 ; VI-NEXT: v_mad_f32 v0, v0, v1, |v2|
640 ; VI-NEXT: s_setpc_b64 s[30:31]
642 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
644 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
646 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
648 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
650 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
652 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
653 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
654 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
655 %src0.ext = fpext half %src0 to float
656 %src1.ext = fpext half %src1 to float
657 %src2.abs = call float @llvm.fabs.f32(float %src2)
658 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
662 define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
663 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
665 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
667 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
669 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
671 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
673 ; GFX900-NEXT: s_setpc_b64 s[30:31]
675 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
677 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
679 ; GFX906-NEXT: s_setpc_b64 s[30:31]
681 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
683 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
685 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
686 ; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
687 ; VI-NEXT: s_setpc_b64 s[30:31]
689 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
691 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
693 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
695 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
697 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
699 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
700 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
701 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
702 %src0.ext = fpext half %src0 to float
703 %src1.ext = fpext half %src1 to float
704 %src2.abs = call float @llvm.fabs.f32(float %src2)
705 %src2.neg.abs = fneg float %src2.abs
706 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
710 ; TODO: Fold inline immediates. Need to be careful because it is an
711 ; f16 inline immediate that may be converted to f32, not an actual f32
714 define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
715 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
716 ; SDAG-GFX1100: ; %bb.0:
717 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
719 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
720 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
721 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
723 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
724 ; SDAG-GFX900: ; %bb.0:
725 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
726 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
727 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
728 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
730 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
731 ; SDAG-GFX906: ; %bb.0:
732 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
734 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
735 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
737 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
739 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
741 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
742 ; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0
743 ; VI-NEXT: s_setpc_b64 s[30:31]
745 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
747 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
749 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
751 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
752 ; GISEL-GFX1100: ; %bb.0:
753 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0
755 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
756 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
757 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
759 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
760 ; GISEL-GFX900: ; %bb.0:
761 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0
763 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
764 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
766 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
767 ; GISEL-GFX906: ; %bb.0:
768 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0
770 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
771 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
773 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
775 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
777 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
778 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
779 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
780 %src0.ext = fpext half %src0 to float
781 %src1.ext = fpext half %src1 to float
782 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
786 define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
787 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
788 ; SDAG-GFX1100: ; %bb.0:
789 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
791 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
792 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
793 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
795 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
796 ; SDAG-GFX900: ; %bb.0:
797 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
799 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
800 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
802 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
803 ; SDAG-GFX906: ; %bb.0:
804 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
806 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
807 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
809 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
811 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
813 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
814 ; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
815 ; VI-NEXT: s_setpc_b64 s[30:31]
817 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
819 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
821 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
823 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
824 ; GISEL-GFX1100: ; %bb.0:
825 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494
827 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
828 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
829 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
831 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
832 ; GISEL-GFX900: ; %bb.0:
833 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494
835 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
836 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
838 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
839 ; GISEL-GFX906: ; %bb.0:
840 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494
842 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
843 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
845 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
847 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
848 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
849 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
850 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
851 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
852 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
853 %src0.ext = fpext half %src0 to float
854 %src1.ext = fpext half %src1 to float
855 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
859 ; Attempt to break inline immediate folding. If the operand is
860 ; interpreted as f32, the inline immediate is really the f16 inline
861 ; imm value converted to f32.
862 ; fpext f16 1/2pi = 0x3e230000
863 ; f32 1/2pi = 0x3e22f983
865 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
866 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
867 ; SDAG-GFX1100: ; %bb.0:
868 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
870 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
871 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
872 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
874 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
875 ; SDAG-GFX900: ; %bb.0:
876 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
878 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
879 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
881 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
882 ; SDAG-GFX906: ; %bb.0:
883 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
885 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
886 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
888 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
890 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
892 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
893 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
894 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
896 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
898 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
899 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
900 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
902 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
903 ; GISEL-GFX1100: ; %bb.0:
904 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000
906 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
907 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
908 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
910 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
911 ; GISEL-GFX900: ; %bb.0:
912 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000
914 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
915 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
917 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
918 ; GISEL-GFX906: ; %bb.0:
919 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
920 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000
921 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
922 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
924 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
926 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
928 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
929 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000
930 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
931 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
933 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
935 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
937 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
938 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000
939 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
940 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
941 %src0.ext = fpext half %src0 to float
942 %src1.ext = fpext half %src1 to float
943 %src2 = fpext half 0xH3118 to float
944 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
949 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
950 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
951 ; SDAG-GFX1100: ; %bb.0:
952 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000
954 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
955 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
956 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
958 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
959 ; SDAG-GFX900: ; %bb.0:
960 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000
962 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
963 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
965 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
966 ; SDAG-GFX906: ; %bb.0:
967 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000
969 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
970 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
972 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
974 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
976 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
977 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
978 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
980 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
982 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
984 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
986 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
987 ; GISEL-GFX1100: ; %bb.0:
988 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000
990 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
991 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
992 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
994 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
995 ; GISEL-GFX900: ; %bb.0:
996 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000
998 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
999 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1001 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1002 ; GISEL-GFX906: ; %bb.0:
1003 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1004 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000
1005 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1006 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1008 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1009 ; GISEL-VI: ; %bb.0:
1010 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1012 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1013 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1014 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
1015 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1017 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1018 ; GISEL-CI: ; %bb.0:
1019 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
1021 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1022 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1023 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
1024 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1025 %src0.ext = fpext half %src0 to float
1026 %src1.ext = fpext half %src1 to float
1027 %src2 = fpext half 0xH003F to float
1028 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1032 define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
1033 ; GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1035 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; GFX1100-NEXT: s_mov_b32 s0, 1.0
1037 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1038 ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1039 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1040 ; GFX1100-NEXT: v_mov_b32_e32 v0, v2
1041 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1043 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1044 ; SDAG-GFX900: ; %bb.0:
1045 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
1047 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1048 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1049 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1050 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1052 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1053 ; SDAG-GFX906: ; %bb.0:
1054 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
1056 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1057 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1058 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1059 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1061 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1063 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1064 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1065 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1066 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1067 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1068 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
1069 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
1070 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1072 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1074 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1075 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1076 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1077 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1078 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1079 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1080 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1081 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1082 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1083 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1084 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1085 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1087 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1088 ; GISEL-GFX900: ; %bb.0:
1089 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090 ; GISEL-GFX900-NEXT: s_mov_b32 s4, 1.0
1091 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1092 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1093 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1094 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1096 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1097 ; GISEL-GFX906: ; %bb.0:
1098 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099 ; GISEL-GFX906-NEXT: s_mov_b32 s4, 1.0
1100 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1101 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1102 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1103 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1105 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1106 ; GISEL-VI: ; %bb.0:
1107 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1109 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1110 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1111 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1112 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0
1113 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0
1114 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1116 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1117 ; GISEL-CI: ; %bb.0:
1118 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1120 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1121 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1122 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1123 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1124 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1125 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1126 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1127 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1128 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
1129 ret <2 x float> %result
1132 define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1133 ; GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1135 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
1137 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1138 ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1139 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1140 ; GFX1100-NEXT: v_mov_b32_e32 v0, v2
1141 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1143 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1144 ; SDAG-GFX900: ; %bb.0:
1145 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1146 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
1147 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1148 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1149 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1150 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1152 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1153 ; SDAG-GFX906: ; %bb.0:
1154 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
1156 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1157 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1158 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1159 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1161 ; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1163 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1165 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1166 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1167 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1168 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1169 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
1170 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4
1171 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1173 ; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1175 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1177 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1178 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1179 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1180 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1181 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1182 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1183 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1184 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1185 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
1186 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1187 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1189 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1190 ; GISEL-GFX900: ; %bb.0:
1191 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192 ; GISEL-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
1193 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1194 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1195 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1196 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1198 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1199 ; GISEL-GFX906: ; %bb.0:
1200 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201 ; GISEL-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
1202 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1203 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1204 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1205 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1207 ; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1208 ; GISEL-VI: ; %bb.0:
1209 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1210 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1211 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1212 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1213 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1214 ; GISEL-VI-NEXT: s_mov_b32 s4, 0x3e230000
1215 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, s4
1216 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, s4
1217 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1219 ; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1220 ; GISEL-CI: ; %bb.0:
1221 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1223 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1224 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1225 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1226 ; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e230000
1227 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4
1228 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4
1229 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1230 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1231 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1232 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1233 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
1234 ret <2 x float> %result
1237 define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1238 ; GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1240 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX1100-NEXT: s_mov_b32 s0, 0.15915494
1242 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1243 ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1244 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1245 ; GFX1100-NEXT: v_mov_b32_e32 v0, v2
1246 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1248 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1249 ; SDAG-GFX900: ; %bb.0:
1250 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1251 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
1252 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1253 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1254 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1255 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1257 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1258 ; SDAG-GFX906: ; %bb.0:
1259 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
1261 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1262 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1263 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1264 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1266 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1268 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1270 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1271 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1272 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1273 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
1274 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
1275 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1277 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1279 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1280 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1281 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1282 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1283 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1284 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1285 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1286 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1287 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1288 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
1289 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
1290 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1291 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1293 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1294 ; GISEL-GFX900: ; %bb.0:
1295 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GISEL-GFX900-NEXT: s_mov_b32 s4, 0.15915494
1297 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1298 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1299 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1300 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1302 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1303 ; GISEL-GFX906: ; %bb.0:
1304 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305 ; GISEL-GFX906-NEXT: s_mov_b32 s4, 0.15915494
1306 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
1307 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1308 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1309 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1311 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1312 ; GISEL-VI: ; %bb.0:
1313 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1314 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1315 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1316 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1317 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1318 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
1319 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
1320 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1322 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1323 ; GISEL-CI: ; %bb.0:
1324 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1326 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1327 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1328 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1329 ; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e22f983
1330 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4
1331 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4
1332 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1333 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1334 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1335 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1336 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
1337 ret <2 x float> %result
1340 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1341 ; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1343 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1345 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1347 ; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1349 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1351 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1355 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1357 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1359 ; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1361 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1362 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1363 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1364 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1365 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1366 ; VI-NEXT: s_setpc_b64 s[30:31]
1368 ; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1370 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
1372 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1374 ; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1375 ; GISEL-CI: ; %bb.0:
1376 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1
1378 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3
1379 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5
1380 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1381 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1382 %src0.hi = extractelement <2 x half> %src0, i32 1
1383 %src1.hi = extractelement <2 x half> %src1, i32 1
1384 %src2.hi = extractelement <2 x half> %src2, i32 1
1385 %src0.ext = fpext half %src0.hi to float
1386 %src1.ext = fpext half %src1.hi to float
1387 %src2.ext = fpext half %src2.hi to float
1388 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1389 %max = call float @llvm.maxnum.f32(float %result, float 0.0)
1390 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
1394 define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
1395 ; GFX1100-LABEL: no_mix_simple:
1397 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2
1399 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX900-LABEL: no_mix_simple:
1403 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX900-NEXT: v_mad_f32 v0, v0, v1, v2
1405 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1407 ; GFX906-LABEL: no_mix_simple:
1409 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410 ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
1411 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1413 ; VI-LABEL: no_mix_simple:
1415 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1416 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
1417 ; VI-NEXT: s_setpc_b64 s[30:31]
1419 ; CI-LABEL: no_mix_simple:
1421 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1422 ; CI-NEXT: v_mad_f32 v0, v0, v1, v2
1423 ; CI-NEXT: s_setpc_b64 s[30:31]
1424 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
1428 define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
1429 ; GFX1100-LABEL: no_mix_simple_fabs:
1431 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2
1433 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1435 ; GFX900-LABEL: no_mix_simple_fabs:
1437 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438 ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
1439 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1441 ; GFX906-LABEL: no_mix_simple_fabs:
1443 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444 ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
1445 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1447 ; VI-LABEL: no_mix_simple_fabs:
1449 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1451 ; VI-NEXT: s_setpc_b64 s[30:31]
1453 ; CI-LABEL: no_mix_simple_fabs:
1455 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456 ; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1457 ; CI-NEXT: s_setpc_b64 s[30:31]
1458 %src0.fabs = call float @llvm.fabs.f32(float %src0)
1459 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
1463 ; FIXME(DAG): Should abe able to select in this case.
1464 ; All sources are converted from f16, so it doesn't matter
1465 ; v_mad_mix_f32 flushes.
1467 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
1468 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1470 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1471 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1472 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1474 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1476 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1478 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1479 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1480 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1481 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1483 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1485 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1486 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1487 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1489 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1491 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1492 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1493 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1494 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1495 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1496 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1497 ; VI-NEXT: s_setpc_b64 s[30:31]
1499 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1501 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1503 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1505 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1506 ; GISEL-CI: ; %bb.0:
1507 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1509 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1510 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1511 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1512 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1513 %src0.ext = fpext half %src0 to float
1514 %src1.ext = fpext half %src1 to float
1515 %src2.ext = fpext half %src2 to float
1516 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1520 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
1521 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1523 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1524 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1525 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1527 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1529 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1531 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1532 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1533 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1535 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1537 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1539 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1541 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1543 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1544 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1545 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1546 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1547 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1548 ; VI-NEXT: s_setpc_b64 s[30:31]
1550 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1552 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1554 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1556 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1557 ; GISEL-CI: ; %bb.0:
1558 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1559 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1560 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1561 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1562 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1563 %src0.ext = fpext half %src0 to float
1564 %src1.ext = fpext half %src1 to float
1565 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1569 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
1570 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1572 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1574 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1575 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2
1576 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1577 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1578 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1579 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1581 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1583 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1584 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1585 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1586 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1587 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1588 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1589 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1591 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1593 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1595 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1596 ; GFX906-NEXT: v_cvt_f32_f16_e32 v2, v2
1597 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1598 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1599 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1601 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1603 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1604 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1605 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1606 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1607 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1608 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1609 ; VI-NEXT: s_setpc_b64 s[30:31]
1611 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1613 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1614 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1615 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
1616 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1618 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1619 ; GISEL-CI: ; %bb.0:
1620 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1622 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1623 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1624 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1625 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
1626 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1627 %src0.ext = fpext half %src0 to float
1628 %src1.ext = fpext half %src1 to float
1629 %src2.ext = fpext half %src2 to float
1630 %mul = fmul float %src0.ext, %src1.ext
1631 %result = fadd float %mul, %src2.ext
1635 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
1636 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1638 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1640 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1641 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1642 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1643 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1644 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1646 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1648 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1650 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1651 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1652 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1653 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1655 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1657 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1658 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1659 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1660 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1661 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1662 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1664 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1666 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1668 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1669 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1670 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1671 ; VI-NEXT: s_setpc_b64 s[30:31]
1673 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1675 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1677 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
1678 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1680 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1681 ; GISEL-CI: ; %bb.0:
1682 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1684 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1685 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1686 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
1687 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1688 %src0.ext = fpext half %src0 to float
1689 %src1.ext = fpext half %src1 to float
1690 %mul = fmul float %src0.ext, %src1.ext
1691 %result = fadd float %mul, %src2
1695 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
1696 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1698 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1699 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1700 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1702 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1704 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1706 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1708 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1710 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1712 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1714 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1716 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1717 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
1718 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1719 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1720 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
1721 ; VI-NEXT: s_setpc_b64 s[30:31]
1723 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1725 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1727 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1729 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1730 ; GISEL-CI: ; %bb.0:
1731 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1732 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
1733 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1734 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1735 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1736 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1737 %src0.ext = fpext half %src0 to float
1738 %src1.ext = fpext half %src1 to float
1739 %src2.ext = fpext half %src2 to float
1740 %mul = fmul contract float %src0.ext, %src1.ext
1741 %result = fadd contract float %mul, %src2.ext
1745 define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
1746 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1748 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1749 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1750 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1752 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1754 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1755 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1756 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1758 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1760 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1761 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1762 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1764 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1766 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1768 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1769 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
1770 ; VI-NEXT: s_setpc_b64 s[30:31]
1772 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1774 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1776 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1778 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1779 ; GISEL-CI: ; %bb.0:
1780 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1782 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1783 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1784 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1785 %src0.ext = fpext half %src0 to float
1786 %src1.ext = fpext half %src1 to float
1787 %mul = fmul contract float %src0.ext, %src1.ext
1788 %result = fadd contract float %mul, %src2
1792 define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1793 ; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1795 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1796 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1797 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1799 ; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1801 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1802 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1803 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1805 ; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1807 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1808 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1809 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1811 ; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1813 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1814 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1815 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1816 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1817 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
1818 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1820 ; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1822 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1823 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1824 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
1825 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1827 ; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1828 ; GISEL-VI: ; %bb.0:
1829 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1830 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
1831 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1832 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1833 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
1834 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1836 ; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1837 ; GISEL-CI: ; %bb.0:
1838 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
1840 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1841 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1842 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1843 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1844 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1845 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0
1846 %src0.neg = fneg half %src0
1847 %src0.ext = fpext half %src0.neg to float
1848 %src1.ext = fpext half %src1 to float
1849 %src2.ext = fpext half %src2 to float
1850 ; %src0.ext.neg = fneg float %src0.ext
1851 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1855 ; Make sure we don't fold pre-cvt fneg if we already have a fabs
1857 define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1858 ; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1860 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1861 ; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1862 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1863 ; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0
1864 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1865 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1867 ; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1869 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870 ; GFX900-NEXT: s_mov_b32 s4, 0x8000
1871 ; GFX900-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1872 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1873 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1875 ; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1877 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1878 ; GFX906-NEXT: s_mov_b32 s4, 0x8000
1879 ; GFX906-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1880 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1881 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1883 ; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1885 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1886 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1887 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1888 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1889 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1890 ; VI-NEXT: s_setpc_b64 s[30:31]
1892 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1894 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1895 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1896 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
1897 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1898 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1900 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1901 ; GISEL-CI: ; %bb.0:
1902 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1904 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
1905 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1906 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1907 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1908 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1909 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1910 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
1911 %src0.neg = fneg half %src0
1912 %src0.ext = fpext half %src0.neg to float
1913 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
1914 %src1.ext = fpext half %src1 to float
1915 %src2.ext = fpext half %src2 to float
1916 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
1920 define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1921 ; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1923 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1925 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1927 ; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1929 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1931 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1933 ; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1935 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1936 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1937 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1939 ; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1941 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1943 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1944 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1945 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
1946 ; VI-NEXT: s_setpc_b64 s[30:31]
1948 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1950 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1952 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
1953 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1954 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1956 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1957 ; GISEL-CI: ; %bb.0:
1958 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1959 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1960 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0|
1961 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1962 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1963 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1964 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1965 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1966 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
1967 %src0.abs = call half @llvm.fabs.f16(half %src0)
1968 %src0.ext = fpext half %src0.abs to float
1969 %src1.ext = fpext half %src1 to float
1970 %src2.ext = fpext half %src2 to float
1971 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1975 define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1976 ; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
1978 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1979 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1980 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1982 ; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
1984 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1985 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1986 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1988 ; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
1990 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1991 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1992 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1994 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
1996 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1998 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1999 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2000 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2001 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2003 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2005 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2007 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2008 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2009 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2011 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2012 ; GISEL-VI: ; %bb.0:
2013 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2014 ; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2015 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2016 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2017 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2018 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2019 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2021 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2022 ; GISEL-CI: ; %bb.0:
2023 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2024 ; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2025 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2026 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2027 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2028 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2029 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2030 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2031 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2032 %fneg = fneg <2 x half> %src0.arg.bc
2033 %src0 = extractelement <2 x half> %fneg, i32 1
2034 %src0.ext = fpext half %src0 to float
2035 %src1.ext = fpext half %src1 to float
2036 %src2.ext = fpext half %src2 to float
2037 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2041 define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2042 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2044 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2045 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2046 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2048 ; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2050 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2051 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2052 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2054 ; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2056 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2057 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2058 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2060 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2062 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2064 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2065 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2066 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2067 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2069 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2071 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2072 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2073 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2074 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2075 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2077 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2078 ; GISEL-VI: ; %bb.0:
2079 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2080 ; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2081 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2082 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2083 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2084 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2085 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2087 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2088 ; GISEL-CI: ; %bb.0:
2089 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2091 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2092 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2093 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2094 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2095 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2096 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2097 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2098 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2099 %src0 = extractelement <2 x half> %fabs, i32 1
2100 %src0.ext = fpext half %src0 to float
2101 %src1.ext = fpext half %src1 to float
2102 %src2.ext = fpext half %src2 to float
2103 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2107 define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2108 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2110 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2111 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2112 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2114 ; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2116 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2117 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2118 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2120 ; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2122 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2124 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2126 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2128 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2129 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2130 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2131 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2132 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2133 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2135 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2137 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2138 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2139 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
2140 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2141 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2143 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2144 ; GISEL-VI: ; %bb.0:
2145 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2146 ; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2147 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2148 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2149 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2150 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2151 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2153 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2154 ; GISEL-CI: ; %bb.0:
2155 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2156 ; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2157 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2158 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2159 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2160 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2161 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2162 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2163 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2164 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2165 %fneg.fabs = fneg <2 x half> %fabs
2166 %src0 = extractelement <2 x half> %fneg.fabs, i32 1
2167 %src0.ext = fpext half %src0 to float
2168 %src1.ext = fpext half %src1 to float
2169 %src2.ext = fpext half %src2 to float
2170 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2174 declare half @llvm.fabs.f16(half) #2
2175 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
2176 declare float @llvm.fabs.f32(float) #2
2177 declare float @llvm.minnum.f32(float, float) #2
2178 declare float @llvm.maxnum.f32(float, float) #2
2179 declare float @llvm.fmuladd.f32(float, float, float) #2
2180 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
2182 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2183 attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
2184 attributes #2 = { nounwind readnone speculatable }