1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
8 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
9 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
10 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
11 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
12 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
14 define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
15 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
17 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
19 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
21 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
23 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
25 ; GFX900-NEXT: s_setpc_b64 s[30:31]
27 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
29 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
31 ; GFX906-NEXT: s_setpc_b64 s[30:31]
33 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
35 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
37 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
38 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
39 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
40 ; VI-NEXT: s_setpc_b64 s[30:31]
42 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
44 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
46 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
48 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
50 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
52 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
53 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
54 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
55 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
56 %src0.ext = fpext half %src0 to float
57 %src1.ext = fpext half %src1 to float
58 %src2.ext = fpext half %src2 to float
59 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
63 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
64 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
66 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
68 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
70 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
72 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
74 ; GFX900-NEXT: s_setpc_b64 s[30:31]
76 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
78 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
80 ; GFX906-NEXT: s_setpc_b64 s[30:31]
82 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
84 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
86 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
87 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
88 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
89 ; VI-NEXT: s_setpc_b64 s[30:31]
91 ; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
93 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
95 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
96 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
97 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v0
98 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
99 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v2
100 ; CI-NEXT: v_mac_f32_e32 v0, v3, v1
101 ; CI-NEXT: s_setpc_b64 s[30:31]
102 %src0.hi = lshr i32 %src0, 16
103 %src1.hi = lshr i32 %src1, 16
104 %src2.hi = lshr i32 %src2, 16
105 %src0.i16 = trunc i32 %src0.hi to i16
106 %src1.i16 = trunc i32 %src1.hi to i16
107 %src2.i16 = trunc i32 %src2.hi to i16
108 %src0.fp16 = bitcast i16 %src0.i16 to half
109 %src1.fp16 = bitcast i16 %src1.i16 to half
110 %src2.fp16 = bitcast i16 %src2.i16 to half
111 %src0.ext = fpext half %src0.fp16 to float
112 %src1.ext = fpext half %src1.fp16 to float
113 %src2.ext = fpext half %src2.fp16 to float
114 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
118 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
119 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
121 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
123 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
125 ; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
127 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
129 ; GFX900-NEXT: s_setpc_b64 s[30:31]
131 ; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
133 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
135 ; GFX906-NEXT: s_setpc_b64 s[30:31]
137 ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
139 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
141 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
142 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
143 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
144 ; VI-NEXT: s_setpc_b64 s[30:31]
146 ; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
148 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5
150 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
152 ; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
154 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
156 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
157 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5
158 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2
159 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
160 %src0.hi = extractelement <2 x half> %src0, i32 1
161 %src1.hi = extractelement <2 x half> %src1, i32 1
162 %src2.hi = extractelement <2 x half> %src2, i32 1
163 %src0.ext = fpext half %src0.hi to float
164 %src1.ext = fpext half %src1.hi to float
165 %src2.ext = fpext half %src2.hi to float
166 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
170 define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
171 ; GFX1100-LABEL: v_mad_mix_v2f32:
173 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
175 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
176 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
177 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
178 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
180 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
181 ; SDAG-GFX900: ; %bb.0:
182 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
184 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
185 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
186 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
188 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
189 ; SDAG-GFX906: ; %bb.0:
190 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
192 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
193 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
194 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
196 ; SDAG-VI-LABEL: v_mad_mix_v2f32:
198 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
200 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0
201 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
202 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1
203 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
204 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
205 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5
206 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6
207 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
209 ; SDAG-CI-LABEL: v_mad_mix_v2f32:
211 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
213 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3
214 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
215 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
216 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5
217 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6
218 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
219 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0
220 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
221 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
222 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
223 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6
224 ; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5
225 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3
226 ; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
227 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
229 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
230 ; GISEL-GFX900: ; %bb.0:
231 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
233 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
234 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
235 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
237 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
238 ; GISEL-GFX906: ; %bb.0:
239 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
241 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
242 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
243 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
245 ; GISEL-VI-LABEL: v_mad_mix_v2f32:
247 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
249 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
250 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1
251 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
252 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
253 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
254 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5
255 ; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6
256 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
258 ; GISEL-CI-LABEL: v_mad_mix_v2f32:
260 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0
262 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1
263 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
264 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
265 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
266 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5
267 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2
268 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3
269 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
270 %src0.ext = fpext <2 x half> %src0 to <2 x float>
271 %src1.ext = fpext <2 x half> %src1 to <2 x float>
272 %src2.ext = fpext <2 x half> %src2 to <2 x float>
273 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
274 ret <2 x float> %result
277 define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
278 ; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
280 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
282 ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
283 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
284 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
285 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
287 ; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
289 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
291 ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
292 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
293 ; GFX900-NEXT: s_setpc_b64 s[30:31]
295 ; GFX906-LABEL: v_mad_mix_v2f32_shuffle:
297 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298 ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
299 ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
300 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
301 ; GFX906-NEXT: s_setpc_b64 s[30:31]
303 ; VI-LABEL: v_mad_mix_v2f32_shuffle:
305 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
307 ; VI-NEXT: v_cvt_f32_f16_e32 v4, v0
308 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v1
309 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
310 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
311 ; VI-NEXT: v_mad_f32 v0, v3, v0, v2
312 ; VI-NEXT: v_mac_f32_e32 v2, v4, v1
313 ; VI-NEXT: v_mov_b32_e32 v1, v2
314 ; VI-NEXT: s_setpc_b64 s[30:31]
316 ; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
318 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
320 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5
321 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
322 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1
323 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
324 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
325 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4
326 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
327 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5
328 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
329 ; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1
330 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3
331 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
333 ; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
335 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
337 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
338 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
339 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5
340 ; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
341 ; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4
342 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
343 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
344 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
345 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
346 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
347 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
348 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
349 ; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
350 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
351 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
352 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
353 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
354 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
355 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
356 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
357 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
358 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
359 ret <2 x float> %result
362 define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
363 ; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
365 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
367 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
369 ; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
371 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
373 ; GFX900-NEXT: s_setpc_b64 s[30:31]
375 ; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
377 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
379 ; GFX906-NEXT: s_setpc_b64 s[30:31]
381 ; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
383 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
385 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
386 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
387 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
388 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
390 ; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
392 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
394 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
396 ; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
398 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
400 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
401 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
402 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
403 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
405 ; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
407 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
409 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
410 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
411 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
412 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
413 %src0.ext = fpext half %src0 to float
414 %src1.ext = fpext half %src1 to float
415 %src2.ext = fpext half %src2 to float
416 %src0.ext.neg = fneg float %src0.ext
417 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
421 define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
422 ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
424 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
426 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
428 ; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
430 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
432 ; GFX900-NEXT: s_setpc_b64 s[30:31]
434 ; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
436 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
438 ; GFX906-NEXT: s_setpc_b64 s[30:31]
440 ; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
442 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
444 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
445 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
446 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
447 ; VI-NEXT: s_setpc_b64 s[30:31]
449 ; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
451 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452 ; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
453 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
455 ; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
457 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
458 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
459 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
460 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
461 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
462 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
463 %src0.ext = fpext half %src0 to float
464 %src1.ext = fpext half %src1 to float
465 %src2.ext = fpext half %src2 to float
466 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
467 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
471 define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
472 ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
474 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
476 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
478 ; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
480 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
482 ; GFX900-NEXT: s_setpc_b64 s[30:31]
484 ; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
486 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
488 ; GFX906-NEXT: s_setpc_b64 s[30:31]
490 ; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
492 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
494 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
495 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
496 ; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
497 ; VI-NEXT: s_setpc_b64 s[30:31]
499 ; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
501 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502 ; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
503 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
505 ; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
507 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
509 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
510 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
511 ; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
512 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
513 %src0.ext = fpext half %src0 to float
514 %src1.ext = fpext half %src1 to float
515 %src2.ext = fpext half %src2 to float
516 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
517 %src0.ext.neg.abs = fneg float %src0.ext.abs
518 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
522 define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
523 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
525 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
527 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
529 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
531 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
533 ; GFX900-NEXT: s_setpc_b64 s[30:31]
535 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
537 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
539 ; GFX906-NEXT: s_setpc_b64 s[30:31]
541 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
543 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
545 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
546 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
547 ; VI-NEXT: s_setpc_b64 s[30:31]
549 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
551 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
553 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
555 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
557 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
559 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
560 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
561 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
562 %src0.ext = fpext half %src0 to float
563 %src1.ext = fpext half %src1 to float
564 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
568 define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
569 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
571 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
573 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
575 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
577 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
581 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
583 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
585 ; GFX906-NEXT: s_setpc_b64 s[30:31]
587 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
589 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
591 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
592 ; VI-NEXT: v_mad_f32 v0, v0, v1, -v2
593 ; VI-NEXT: s_setpc_b64 s[30:31]
595 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
597 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
599 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
601 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
603 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
605 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
606 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
607 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
608 %src0.ext = fpext half %src0 to float
609 %src1.ext = fpext half %src1 to float
610 %src2.neg = fneg float %src2
611 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
615 define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
616 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
618 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
620 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
622 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
624 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
626 ; GFX900-NEXT: s_setpc_b64 s[30:31]
628 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
630 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
632 ; GFX906-NEXT: s_setpc_b64 s[30:31]
634 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
636 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
638 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
639 ; VI-NEXT: v_mad_f32 v0, v0, v1, |v2|
640 ; VI-NEXT: s_setpc_b64 s[30:31]
642 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
644 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
646 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
648 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
650 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
652 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
653 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
654 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
655 %src0.ext = fpext half %src0 to float
656 %src1.ext = fpext half %src1 to float
657 %src2.abs = call float @llvm.fabs.f32(float %src2)
658 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
662 define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
663 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
665 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
667 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
669 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
671 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
673 ; GFX900-NEXT: s_setpc_b64 s[30:31]
675 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
677 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
679 ; GFX906-NEXT: s_setpc_b64 s[30:31]
681 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
683 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
685 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
686 ; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
687 ; VI-NEXT: s_setpc_b64 s[30:31]
689 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
691 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
693 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
695 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
697 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
699 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
700 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
701 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
702 %src0.ext = fpext half %src0 to float
703 %src1.ext = fpext half %src1 to float
704 %src2.abs = call float @llvm.fabs.f32(float %src2)
705 %src2.neg.abs = fneg float %src2.abs
706 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
710 ; TODO: Fold inline immediates. Need to be careful because it is an
711 ; f16 inline immediate that may be converted to f32, not an actual f32
714 define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
715 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
716 ; SDAG-GFX1100: ; %bb.0:
717 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
719 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
720 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
721 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
723 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
724 ; SDAG-GFX900: ; %bb.0:
725 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
726 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
727 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
728 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
730 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
731 ; SDAG-GFX906: ; %bb.0:
732 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
734 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
735 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
737 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
739 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
741 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
742 ; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0
743 ; VI-NEXT: s_setpc_b64 s[30:31]
745 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
747 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
749 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
751 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
752 ; GISEL-GFX1100: ; %bb.0:
753 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0
755 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
756 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
757 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
759 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
760 ; GISEL-GFX900: ; %bb.0:
761 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0
763 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
764 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
766 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
767 ; GISEL-GFX906: ; %bb.0:
768 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0
770 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
771 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
773 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
775 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
777 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
778 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
779 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
780 %src0.ext = fpext half %src0 to float
781 %src1.ext = fpext half %src1 to float
782 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
786 define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
787 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
788 ; SDAG-GFX1100: ; %bb.0:
789 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
791 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
792 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
793 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
795 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
796 ; SDAG-GFX900: ; %bb.0:
797 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
799 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
800 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
802 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
803 ; SDAG-GFX906: ; %bb.0:
804 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
806 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
807 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
809 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
811 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
813 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
814 ; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
815 ; VI-NEXT: s_setpc_b64 s[30:31]
817 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
819 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
821 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
823 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
824 ; GISEL-GFX1100: ; %bb.0:
825 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494
827 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
828 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
829 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
831 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
832 ; GISEL-GFX900: ; %bb.0:
833 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494
835 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
836 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
838 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
839 ; GISEL-GFX906: ; %bb.0:
840 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494
842 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
843 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
845 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
847 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
848 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
849 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
850 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
851 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
852 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
853 %src0.ext = fpext half %src0 to float
854 %src1.ext = fpext half %src1 to float
855 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
859 ; Attempt to break inline immediate folding. If the operand is
860 ; interpreted as f32, the inline immediate is really the f16 inline
861 ; imm value converted to f32.
862 ; fpext f16 1/2pi = 0x3e230000
863 ; f32 1/2pi = 0x3e22f983
865 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
866 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
867 ; SDAG-GFX1100: ; %bb.0:
868 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
870 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
871 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
872 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
874 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
875 ; SDAG-GFX900: ; %bb.0:
876 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
878 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
879 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
881 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
882 ; SDAG-GFX906: ; %bb.0:
883 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
885 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
886 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
888 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
890 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
892 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
893 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
894 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
896 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
898 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
899 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
900 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
902 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
903 ; GISEL-GFX1100: ; %bb.0:
904 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000
906 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
907 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
908 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
910 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
911 ; GISEL-GFX900: ; %bb.0:
912 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000
914 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
915 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
917 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
918 ; GISEL-GFX906: ; %bb.0:
919 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
920 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000
921 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
922 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
924 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
926 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
928 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
929 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000
930 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
931 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
933 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
935 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
937 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
938 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000
939 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
940 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
941 %src0.ext = fpext half %src0 to float
942 %src1.ext = fpext half %src1 to float
943 %src2 = fpext half 0xH3118 to float
944 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
949 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
950 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
951 ; SDAG-GFX1100: ; %bb.0:
952 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000
954 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
955 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
956 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
958 ; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
959 ; SDAG-GFX900: ; %bb.0:
960 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000
962 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
963 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
965 ; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
966 ; SDAG-GFX906: ; %bb.0:
967 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000
969 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
970 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
972 ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
974 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
976 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
977 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
978 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
980 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
982 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
984 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
986 ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
987 ; GISEL-GFX1100: ; %bb.0:
988 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000
990 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
991 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
992 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
994 ; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
995 ; GISEL-GFX900: ; %bb.0:
996 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000
998 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
999 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1001 ; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1002 ; GISEL-GFX906: ; %bb.0:
1003 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1004 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000
1005 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1006 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1008 ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1009 ; GISEL-VI: ; %bb.0:
1010 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1012 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1013 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1014 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
1015 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1017 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1018 ; GISEL-CI: ; %bb.0:
1019 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
1021 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1022 ; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000
1023 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
1024 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1025 %src0.ext = fpext half %src0 to float
1026 %src1.ext = fpext half %src1 to float
1027 %src2 = fpext half 0xH003F to float
1028 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1032 define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
1033 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1034 ; SDAG-GFX1100: ; %bb.0:
1035 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
1037 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1038 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1039 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1040 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1041 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1043 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1044 ; SDAG-GFX900: ; %bb.0:
1045 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
1047 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1048 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1049 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1050 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1052 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1053 ; SDAG-GFX906: ; %bb.0:
1054 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
1056 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1057 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1058 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1059 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1061 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1063 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1064 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1065 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1066 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1067 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1068 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
1069 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
1070 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1072 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1074 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1075 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1076 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1077 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1078 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1079 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1080 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1081 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1082 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1083 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1084 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1085 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1087 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1088 ; GISEL-GFX1100: ; %bb.0:
1089 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 1.0
1091 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1092 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1093 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1094 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1095 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1097 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1098 ; GISEL-GFX900: ; %bb.0:
1099 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 1.0
1101 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1102 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1103 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1104 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1106 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1107 ; GISEL-GFX906: ; %bb.0:
1108 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 1.0
1110 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1111 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1112 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1113 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1115 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1116 ; GISEL-VI: ; %bb.0:
1117 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1119 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1120 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1121 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1122 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0
1123 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0
1124 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1126 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1127 ; GISEL-CI: ; %bb.0:
1128 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1130 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1131 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1132 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1133 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
1134 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
1135 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1136 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1137 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1138 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
1139 ret <2 x float> %result
1142 define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1143 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1144 ; SDAG-GFX1100: ; %bb.0:
1145 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1146 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
1147 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1148 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1149 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1150 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1151 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1153 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1154 ; SDAG-GFX900: ; %bb.0:
1155 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1156 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
1157 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1158 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1159 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1160 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1162 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1163 ; SDAG-GFX906: ; %bb.0:
1164 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
1166 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1167 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1168 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1169 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1171 ; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1173 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1174 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1175 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1176 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1177 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1178 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1179 ; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
1180 ; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4
1181 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1183 ; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1185 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1187 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1188 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1189 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1190 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1191 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1192 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1193 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1194 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1195 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
1196 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1197 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1199 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1200 ; GISEL-GFX1100: ; %bb.0:
1201 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0x3e230000
1203 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1204 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1205 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1206 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1207 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1209 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1210 ; GISEL-GFX900: ; %bb.0:
1211 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0x3e230000
1213 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1214 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1215 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1216 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1218 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1219 ; GISEL-GFX906: ; %bb.0:
1220 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1221 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0x3e230000
1222 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1223 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1224 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1225 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1227 ; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1228 ; GISEL-VI: ; %bb.0:
1229 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1231 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1232 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1233 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1234 ; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1235 ; GISEL-VI-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
1236 ; GISEL-VI-NEXT: v_mac_f32_e32 v1, v3, v4
1237 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1239 ; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1240 ; GISEL-CI: ; %bb.0:
1241 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1243 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1244 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1245 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1246 ; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
1247 ; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
1248 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1249 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1250 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1251 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1252 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1253 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
1254 ret <2 x float> %result
1257 define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1258 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1259 ; SDAG-GFX1100: ; %bb.0:
1260 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
1262 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1263 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1264 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1265 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1266 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1268 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1269 ; SDAG-GFX900: ; %bb.0:
1270 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1271 ; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
1272 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1273 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1274 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
1275 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1277 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1278 ; SDAG-GFX906: ; %bb.0:
1279 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1280 ; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
1281 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1282 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1283 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
1284 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1286 ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1288 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1290 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1291 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
1292 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1293 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
1294 ; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
1295 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1297 ; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1299 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1300 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1301 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1302 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1303 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1304 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1305 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1306 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1307 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1308 ; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
1309 ; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
1310 ; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1311 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1313 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1314 ; GISEL-GFX1100: ; %bb.0:
1315 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0.15915494
1317 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1318 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1319 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1320 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
1321 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1323 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1324 ; GISEL-GFX900: ; %bb.0:
1325 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1326 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0.15915494
1327 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1328 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1329 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
1330 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1332 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1333 ; GISEL-GFX906: ; %bb.0:
1334 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0.15915494
1336 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1337 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1338 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
1339 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1341 ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1342 ; GISEL-VI: ; %bb.0:
1343 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
1345 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1346 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
1347 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1348 ; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
1349 ; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
1350 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1352 ; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1353 ; GISEL-CI: ; %bb.0:
1354 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1356 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
1357 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1358 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1359 ; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
1360 ; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
1361 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
1362 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1363 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1364 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1365 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1366 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
1367 ret <2 x float> %result
1370 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1371 ; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1373 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1375 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1377 ; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1379 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1381 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1383 ; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1385 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1387 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1389 ; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1391 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1393 ; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1394 ; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1395 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1396 ; VI-NEXT: s_setpc_b64 s[30:31]
1398 ; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1400 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401 ; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
1402 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1404 ; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1405 ; GISEL-CI: ; %bb.0:
1406 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1
1408 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3
1409 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5
1410 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1411 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1412 %src0.hi = extractelement <2 x half> %src0, i32 1
1413 %src1.hi = extractelement <2 x half> %src1, i32 1
1414 %src2.hi = extractelement <2 x half> %src2, i32 1
1415 %src0.ext = fpext half %src0.hi to float
1416 %src1.ext = fpext half %src1.hi to float
1417 %src2.ext = fpext half %src2.hi to float
1418 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1419 %max = call float @llvm.maxnum.f32(float %result, float 0.0)
1420 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
1424 define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
1425 ; GFX1100-LABEL: no_mix_simple:
1427 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1428 ; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2
1429 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1431 ; GFX900-LABEL: no_mix_simple:
1433 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1434 ; GFX900-NEXT: v_mad_f32 v0, v0, v1, v2
1435 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1437 ; GFX906-LABEL: no_mix_simple:
1439 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1440 ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
1441 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1443 ; VI-LABEL: no_mix_simple:
1445 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
1447 ; VI-NEXT: s_setpc_b64 s[30:31]
1449 ; CI-LABEL: no_mix_simple:
1451 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452 ; CI-NEXT: v_mad_f32 v0, v0, v1, v2
1453 ; CI-NEXT: s_setpc_b64 s[30:31]
1454 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
1458 define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
1459 ; GFX1100-LABEL: no_mix_simple_fabs:
1461 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1462 ; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2
1463 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1465 ; GFX900-LABEL: no_mix_simple_fabs:
1467 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468 ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
1469 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1471 ; GFX906-LABEL: no_mix_simple_fabs:
1473 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474 ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
1475 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1477 ; VI-LABEL: no_mix_simple_fabs:
1479 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1480 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1481 ; VI-NEXT: s_setpc_b64 s[30:31]
1483 ; CI-LABEL: no_mix_simple_fabs:
1485 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1486 ; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1487 ; CI-NEXT: s_setpc_b64 s[30:31]
1488 %src0.fabs = call float @llvm.fabs.f32(float %src0)
1489 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
1493 ; FIXME(DAG): Should abe able to select in this case.
1494 ; All sources are converted from f16, so it doesn't matter
1495 ; v_mad_mix_f32 flushes.
1497 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
1498 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1500 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1501 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1502 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1504 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1506 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1508 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1509 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1510 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1511 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1513 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1515 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1517 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1519 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1521 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1523 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1524 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1525 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1526 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1527 ; VI-NEXT: s_setpc_b64 s[30:31]
1529 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1531 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1533 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1535 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1536 ; GISEL-CI: ; %bb.0:
1537 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1539 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1540 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1541 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1542 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1543 %src0.ext = fpext half %src0 to float
1544 %src1.ext = fpext half %src1 to float
1545 %src2.ext = fpext half %src2 to float
1546 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1550 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
1551 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1553 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1555 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1557 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1559 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1560 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1561 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1562 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
1563 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1565 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1567 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1569 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1571 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1573 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1575 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1576 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1577 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1578 ; VI-NEXT: s_setpc_b64 s[30:31]
1580 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1582 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1584 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1586 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1587 ; GISEL-CI: ; %bb.0:
1588 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1589 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1590 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1591 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
1592 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1593 %src0.ext = fpext half %src0 to float
1594 %src1.ext = fpext half %src1 to float
1595 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1599 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
1600 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1602 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1604 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1605 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2
1606 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1607 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1608 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1609 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1611 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1613 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1614 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1615 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1616 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
1617 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1618 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1619 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1621 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1623 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1624 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1625 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1626 ; GFX906-NEXT: v_cvt_f32_f16_e32 v2, v2
1627 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1628 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1629 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1631 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1633 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1634 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1635 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1636 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1637 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1638 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1639 ; VI-NEXT: s_setpc_b64 s[30:31]
1641 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1643 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1645 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
1646 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1648 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1649 ; GISEL-CI: ; %bb.0:
1650 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1652 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1653 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1654 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1655 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
1656 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1657 %src0.ext = fpext half %src0 to float
1658 %src1.ext = fpext half %src1 to float
1659 %src2.ext = fpext half %src2 to float
1660 %mul = fmul float %src0.ext, %src1.ext
1661 %result = fadd float %mul, %src2.ext
1665 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
1666 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1668 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1669 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
1670 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
1671 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1672 ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
1673 ; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
1674 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1676 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1678 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1680 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
1681 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
1682 ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
1683 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1685 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1687 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1688 ; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
1689 ; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
1690 ; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
1691 ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
1692 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1694 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1696 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1697 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1698 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1699 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
1700 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
1701 ; VI-NEXT: s_setpc_b64 s[30:31]
1703 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1705 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1706 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1707 ; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
1708 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1710 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1711 ; GISEL-CI: ; %bb.0:
1712 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1714 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1715 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
1716 ; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
1717 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1718 %src0.ext = fpext half %src0 to float
1719 %src1.ext = fpext half %src1 to float
1720 %mul = fmul float %src0.ext, %src1.ext
1721 %result = fadd float %mul, %src2
1725 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
1726 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1728 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1730 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1732 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1734 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1736 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1738 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1740 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1742 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1744 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1746 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747 ; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
1748 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1749 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1750 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
1751 ; VI-NEXT: s_setpc_b64 s[30:31]
1753 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1755 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1757 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1759 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
1760 ; GISEL-CI: ; %bb.0:
1761 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
1763 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1764 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1765 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1766 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1767 %src0.ext = fpext half %src0 to float
1768 %src1.ext = fpext half %src1 to float
1769 %src2.ext = fpext half %src2 to float
1770 %mul = fmul contract float %src0.ext, %src1.ext
1771 %result = fadd contract float %mul, %src2.ext
1775 define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
1776 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1778 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1779 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1780 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1782 ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1784 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1785 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1786 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1788 ; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1790 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1791 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1792 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1794 ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1796 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1797 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1798 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1799 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2
1800 ; VI-NEXT: s_setpc_b64 s[30:31]
1802 ; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1804 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1805 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1806 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1808 ; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
1809 ; GISEL-CI: ; %bb.0:
1810 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1812 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1813 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1814 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1815 %src0.ext = fpext half %src0 to float
1816 %src1.ext = fpext half %src1 to float
1817 %mul = fmul contract float %src0.ext, %src1.ext
1818 %result = fadd contract float %mul, %src2
1822 define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1823 ; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1825 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1827 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1829 ; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1831 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1833 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1835 ; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1837 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
1839 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1841 ; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1843 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1844 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1845 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1846 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1847 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
1848 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1850 ; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1852 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1853 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1854 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
1855 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1857 ; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1858 ; GISEL-VI: ; %bb.0:
1859 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860 ; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
1861 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1862 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1863 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
1864 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1866 ; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
1867 ; GISEL-CI: ; %bb.0:
1868 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1869 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
1870 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1871 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1872 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1873 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1874 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1875 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0
1876 %src0.neg = fneg half %src0
1877 %src0.ext = fpext half %src0.neg to float
1878 %src1.ext = fpext half %src1 to float
1879 %src2.ext = fpext half %src2 to float
1880 ; %src0.ext.neg = fneg float %src0.ext
1881 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1885 ; Make sure we don't fold pre-cvt fneg if we already have a fabs
1887 define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1888 ; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1890 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891 ; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1892 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1893 ; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0
1894 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1895 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1897 ; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1899 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900 ; GFX900-NEXT: s_mov_b32 s4, 0x8000
1901 ; GFX900-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1902 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1903 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1905 ; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1907 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1908 ; GFX906-NEXT: s_mov_b32 s4, 0x8000
1909 ; GFX906-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1910 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
1911 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1913 ; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1915 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1917 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1918 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1919 ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1920 ; VI-NEXT: s_setpc_b64 s[30:31]
1922 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1924 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1926 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
1927 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1928 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1930 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
1931 ; GISEL-CI: ; %bb.0:
1932 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1933 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1934 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
1935 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1936 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1937 ; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
1938 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1939 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1940 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
1941 %src0.neg = fneg half %src0
1942 %src0.ext = fpext half %src0.neg to float
1943 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
1944 %src1.ext = fpext half %src1 to float
1945 %src2.ext = fpext half %src2 to float
1946 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
1950 define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
1951 ; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1953 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1954 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1955 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1957 ; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1959 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1961 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1963 ; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1965 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
1967 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1969 ; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1971 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972 ; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1973 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1974 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
1975 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1
1976 ; VI-NEXT: s_setpc_b64 s[30:31]
1978 ; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1980 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1982 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
1983 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
1984 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1986 ; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
1987 ; GISEL-CI: ; %bb.0:
1988 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1989 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1990 ; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0|
1991 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1992 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
1993 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
1994 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1995 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
1996 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
1997 %src0.abs = call half @llvm.fabs.f16(half %src0)
1998 %src0.ext = fpext half %src0.abs to float
1999 %src1.ext = fpext half %src1 to float
2000 %src2.ext = fpext half %src2 to float
2001 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2005 define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2006 ; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2008 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2009 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2010 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2012 ; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2014 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2016 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2018 ; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2020 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2021 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2022 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2024 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2026 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2027 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2028 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2029 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2030 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2031 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2033 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2035 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2036 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2037 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2038 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2039 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2041 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2042 ; GISEL-VI: ; %bb.0:
2043 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044 ; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2045 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2046 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2047 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2048 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2049 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2051 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2052 ; GISEL-CI: ; %bb.0:
2053 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2054 ; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2055 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2056 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2057 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2058 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2059 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2060 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2061 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2062 %fneg = fneg <2 x half> %src0.arg.bc
2063 %src0 = extractelement <2 x half> %fneg, i32 1
2064 %src0.ext = fpext half %src0 to float
2065 %src1.ext = fpext half %src1 to float
2066 %src2.ext = fpext half %src2 to float
2067 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2071 define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2072 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2074 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2075 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2076 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2078 ; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2080 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2081 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2082 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2084 ; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2086 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2087 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2088 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2090 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2092 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2093 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2094 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2095 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2096 ; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2097 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2099 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2101 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2102 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2103 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2104 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2105 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2107 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2108 ; GISEL-VI: ; %bb.0:
2109 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2110 ; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2111 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2112 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2113 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2114 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2115 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2117 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2118 ; GISEL-CI: ; %bb.0:
2119 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2120 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2121 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2122 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2123 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2124 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2125 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2126 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2127 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2128 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2129 %src0 = extractelement <2 x half> %fabs, i32 1
2130 %src0.ext = fpext half %src0 to float
2131 %src1.ext = fpext half %src1 to float
2132 %src2.ext = fpext half %src2 to float
2133 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2137 define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2138 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2140 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2142 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2144 ; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2147 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2148 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2150 ; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2152 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2153 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2154 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2156 ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2158 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2160 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2161 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2162 ; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
2163 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2165 ; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2167 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2168 ; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2169 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
2170 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
2171 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2173 ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2174 ; GISEL-VI: ; %bb.0:
2175 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2176 ; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2177 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2178 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2179 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
2180 ; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
2181 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2183 ; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2184 ; GISEL-CI: ; %bb.0:
2185 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186 ; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2187 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2188 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
2189 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2190 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
2191 ; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
2192 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2193 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2194 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2195 %fneg.fabs = fneg <2 x half> %fabs
2196 %src0 = extractelement <2 x half> %fneg.fabs, i32 1
2197 %src0.ext = fpext half %src0 to float
2198 %src1.ext = fpext half %src1 to float
2199 %src2.ext = fpext half %src2 to float
2200 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2204 declare half @llvm.fabs.f16(half) #2
2205 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
2206 declare float @llvm.fabs.f32(float) #2
2207 declare float @llvm.minnum.f32(float, float) #2
2208 declare float @llvm.maxnum.f32(float, float) #2
2209 declare float @llvm.fmuladd.f32(float, float, float) #2
2210 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
2212 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2213 attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
2214 attributes #2 = { nounwind readnone speculatable }