1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
5 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
6 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-CI %s
8 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
9 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
10 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
11 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
12 ; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-CI %s
14 define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
15 ; GFX1100-LABEL: mixlo_simple:
17 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
19 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
21 ; GFX900-LABEL: mixlo_simple:
23 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2
25 ; GFX900-NEXT: s_setpc_b64 s[30:31]
27 ; GFX906-LABEL: mixlo_simple:
29 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
31 ; GFX906-NEXT: s_setpc_b64 s[30:31]
33 ; VI-LABEL: mixlo_simple:
35 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; VI-NEXT: v_mac_f32_e32 v2, v0, v1
37 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
38 ; VI-NEXT: s_setpc_b64 s[30:31]
40 ; SDAG-CI-LABEL: mixlo_simple:
42 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
44 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
45 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
46 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
48 ; GISEL-CI-LABEL: mixlo_simple:
50 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
52 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
53 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
54 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
55 %cvt.result = fptrunc float %result to half
59 define half @mixlo_simpl_no_flush(float %src0, float %src1, float %src2) {
60 ; GFX1100-LABEL: mixlo_simpl_no_flush:
62 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
64 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
66 ; GFX900-LABEL: mixlo_simpl_no_flush:
68 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
70 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
71 ; GFX900-NEXT: s_setpc_b64 s[30:31]
73 ; GFX906-LABEL: mixlo_simpl_no_flush:
75 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
77 ; GFX906-NEXT: s_setpc_b64 s[30:31]
79 ; VI-LABEL: mixlo_simpl_no_flush:
81 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
83 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
84 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
85 ; VI-NEXT: s_setpc_b64 s[30:31]
87 ; SDAG-CI-LABEL: mixlo_simpl_no_flush:
89 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
91 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
92 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
93 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
95 ; GISEL-CI-LABEL: mixlo_simpl_no_flush:
97 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
99 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
100 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
101 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
102 %cvt.result = fptrunc float %result to half
106 define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
107 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
109 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
111 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
113 ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
115 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
117 ; GFX900-NEXT: s_setpc_b64 s[30:31]
119 ; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
121 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
123 ; GFX906-NEXT: s_setpc_b64 s[30:31]
125 ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
127 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
129 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
130 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
131 ; VI-NEXT: v_mac_f32_e32 v2, v0, v1
132 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
133 ; VI-NEXT: s_setpc_b64 s[30:31]
135 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
137 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
139 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
140 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
141 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
143 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
145 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
147 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
148 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
149 ; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
150 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
151 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
152 %src0.ext = fpext half %src0 to float
153 %src1.ext = fpext half %src1 to float
154 %src2.ext = fpext half %src2 to float
155 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
156 %cvt.result = fptrunc float %result to half
160 define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush(half %src0, half %src1, half %src2) {
161 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
163 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
165 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
167 ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
169 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
171 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
172 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
173 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
174 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
175 ; GFX900-NEXT: s_setpc_b64 s[30:31]
177 ; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
179 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
181 ; GFX906-NEXT: s_setpc_b64 s[30:31]
183 ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
185 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
187 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
188 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
189 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
190 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
191 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
192 ; VI-NEXT: s_setpc_b64 s[30:31]
194 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
196 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
198 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
199 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
200 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
202 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush:
204 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
206 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
207 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
208 ; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
209 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
210 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
211 %src0.ext = fpext half %src0 to float
212 %src1.ext = fpext half %src1 to float
213 %src2.ext = fpext half %src2 to float
214 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
215 %cvt.result = fptrunc float %result to half
219 define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
220 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
222 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
224 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
226 ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
228 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
230 ; GFX900-NEXT: s_setpc_b64 s[30:31]
232 ; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
234 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
236 ; GFX906-NEXT: s_setpc_b64 s[30:31]
238 ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
240 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
242 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
243 ; VI-NEXT: v_mac_f32_e32 v2, v0, v1
244 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
245 ; VI-NEXT: s_setpc_b64 s[30:31]
247 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
249 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
251 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
252 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
253 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
255 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
257 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
259 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
260 ; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
261 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
262 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
263 %src0.ext = fpext half %src0 to float
264 %src1.ext = fpext half %src1 to float
265 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
266 %cvt.result = fptrunc float %result to half
270 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
271 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
273 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
275 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
277 ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
279 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
281 ; GFX900-NEXT: s_setpc_b64 s[30:31]
283 ; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
285 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
287 ; GFX906-NEXT: s_setpc_b64 s[30:31]
289 ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
291 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
293 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
294 ; VI-NEXT: v_mac_f32_e32 v2, v0, v1
295 ; VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
296 ; VI-NEXT: s_setpc_b64 s[30:31]
298 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
300 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
302 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
303 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
304 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
306 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
308 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
310 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
311 ; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
312 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
313 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0
314 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
315 ; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1
316 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
317 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0
318 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
319 ; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1
320 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
321 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
322 %src0.ext = fpext half %src0 to float
323 %src1.ext = fpext half %src1 to float
324 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
325 %cvt.result = fptrunc float %result to half
326 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0)
327 %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
331 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 {
332 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
334 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
336 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
337 ; GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
338 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
340 ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
342 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
344 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
345 ; GFX900-NEXT: s_setpc_b64 s[30:31]
347 ; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
349 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
351 ; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
352 ; GFX906-NEXT: s_setpc_b64 s[30:31]
354 ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
356 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
358 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
359 ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
360 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
361 ; VI-NEXT: s_setpc_b64 s[30:31]
363 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
365 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
367 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
368 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
369 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
371 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
373 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
375 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
376 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
377 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
378 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
379 %src0.ext = fpext half %src0 to float
380 %src1.ext = fpext half %src1 to float
381 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
382 %max = call float @llvm.maxnum.f32(float %result, float 0.0)
383 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
384 %cvt.result = fptrunc float %clamp to half
388 ; FIXME(DAG): Should abe able to avoid extra register because first
389 ; operation only clobbers relevant lane.
391 define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
392 ; GFX1100-LABEL: v_mad_mix_v2f32:
394 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
396 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
397 ; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
398 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
399 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
401 ; GFX900-LABEL: v_mad_mix_v2f32:
403 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
405 ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
406 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
407 ; GFX900-NEXT: s_setpc_b64 s[30:31]
409 ; GFX906-LABEL: v_mad_mix_v2f32:
411 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
413 ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
414 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
415 ; GFX906-NEXT: s_setpc_b64 s[30:31]
417 ; SDAG-VI-LABEL: v_mad_mix_v2f32:
419 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
421 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
422 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
423 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
424 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
425 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
426 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
427 ; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
428 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
429 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2
430 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
431 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
433 ; SDAG-CI-LABEL: v_mad_mix_v2f32:
435 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
437 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
438 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
439 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
440 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
441 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
442 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
443 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
444 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
445 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
446 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
447 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
448 ; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
449 ; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
450 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
451 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
452 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
453 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
454 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
456 ; GISEL-VI-LABEL: v_mad_mix_v2f32:
458 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
460 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
461 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
462 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
463 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
464 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
465 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
466 ; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
467 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
468 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
469 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
470 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
472 ; GISEL-CI-LABEL: v_mad_mix_v2f32:
474 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
476 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
477 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
478 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
479 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
480 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
481 ; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
482 ; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
483 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
484 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
485 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
486 %src0.ext = fpext <2 x half> %src0 to <2 x float>
487 %src1.ext = fpext <2 x half> %src1 to <2 x float>
488 %src2.ext = fpext <2 x half> %src2 to <2 x float>
489 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
490 %cvt.result = fptrunc <2 x float> %result to <2 x half>
491 ret <2 x half> %cvt.result
494 define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
495 ; GFX1100-LABEL: v_mad_mix_v3f32:
497 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
499 ; GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
500 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
501 ; GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
502 ; GFX1100-NEXT: v_mov_b32_e32 v0, v6
503 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
505 ; SDAG-GFX900-LABEL: v_mad_mix_v3f32:
506 ; SDAG-GFX900: ; %bb.0:
507 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
509 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
510 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
511 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
512 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
514 ; SDAG-GFX906-LABEL: v_mad_mix_v3f32:
515 ; SDAG-GFX906: ; %bb.0:
516 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
518 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
519 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
520 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
521 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
523 ; SDAG-VI-LABEL: v_mad_mix_v3f32:
525 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
527 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
528 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
529 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
530 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
531 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
532 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
533 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
534 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
535 ; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7
536 ; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
537 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
538 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4
539 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
540 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
541 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
542 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
544 ; SDAG-CI-LABEL: v_mad_mix_v3f32:
546 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
548 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
549 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
550 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
551 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
552 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
553 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
554 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
555 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
556 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
557 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
558 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
559 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
560 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
561 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
562 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
563 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
564 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
565 ; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5
566 ; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4
567 ; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3
568 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
569 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
570 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
571 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
572 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
573 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
574 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
576 ; GISEL-GFX900-LABEL: v_mad_mix_v3f32:
577 ; GISEL-GFX900: ; %bb.0:
578 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
580 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
581 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
582 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
583 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
585 ; GISEL-GFX906-LABEL: v_mad_mix_v3f32:
586 ; GISEL-GFX906: ; %bb.0:
587 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
589 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
590 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
591 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
592 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
594 ; GISEL-VI-LABEL: v_mad_mix_v3f32:
596 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
598 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
599 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
600 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
601 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
602 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
603 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
604 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
605 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
606 ; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
607 ; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
608 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v8
609 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
610 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
611 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
612 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v2
613 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
615 ; GISEL-CI-LABEL: v_mad_mix_v3f32:
617 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
619 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
620 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
621 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
622 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
623 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
624 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
625 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
626 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
627 ; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3
628 ; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4
629 ; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5
630 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
631 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
632 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
633 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
634 %src0.ext = fpext <3 x half> %src0 to <3 x float>
635 %src1.ext = fpext <3 x half> %src1 to <3 x float>
636 %src2.ext = fpext <3 x half> %src2 to <3 x float>
637 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
638 %cvt.result = fptrunc <3 x float> %result to <3 x half>
639 ret <3 x half> %cvt.result
642 define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
643 ; GFX1100-LABEL: v_mad_mix_v4f32:
645 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
647 ; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
648 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
649 ; GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
650 ; GFX1100-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
651 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
652 ; GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
653 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
655 ; SDAG-GFX900-LABEL: v_mad_mix_v4f32:
656 ; SDAG-GFX900: ; %bb.0:
657 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
659 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
660 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
661 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
662 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v7
663 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v6
664 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
666 ; SDAG-GFX906-LABEL: v_mad_mix_v4f32:
667 ; SDAG-GFX906: ; %bb.0:
668 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
670 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
671 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
672 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
673 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v7
674 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v6
675 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
677 ; SDAG-VI-LABEL: v_mad_mix_v4f32:
679 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
681 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
682 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
683 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
684 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
685 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
686 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
687 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
688 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
689 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
690 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
691 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
692 ; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9
693 ; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8
694 ; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
695 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
696 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
697 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
698 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4
699 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v3, v5
700 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
701 ; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1
702 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
704 ; SDAG-CI-LABEL: v_mad_mix_v4f32:
706 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
708 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
709 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
710 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
711 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
712 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
713 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
714 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
715 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
716 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
717 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
718 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
719 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
720 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
721 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
722 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
723 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
724 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
725 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
726 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
727 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
728 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
729 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
730 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
731 ; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7
732 ; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6
733 ; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5
734 ; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4
735 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
736 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
737 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
738 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
739 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
740 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
741 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
742 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
743 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
745 ; GISEL-GFX900-LABEL: v_mad_mix_v4f32:
746 ; GISEL-GFX900: ; %bb.0:
747 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
749 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
750 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
751 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
752 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
753 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v7
754 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
756 ; GISEL-GFX906-LABEL: v_mad_mix_v4f32:
757 ; GISEL-GFX906: ; %bb.0:
758 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
760 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
761 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
762 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
763 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
764 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v7
765 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
767 ; GISEL-VI-LABEL: v_mad_mix_v4f32:
769 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
770 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
771 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
772 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
773 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
774 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
775 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
776 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
777 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
778 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
779 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
780 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
781 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
782 ; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
783 ; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
784 ; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
785 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
786 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v10
787 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
788 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v11
789 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
790 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
791 ; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v3
792 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
794 ; GISEL-CI-LABEL: v_mad_mix_v4f32:
796 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
798 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
799 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
800 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
801 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
802 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
803 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
804 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
805 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
806 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
807 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
808 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
809 ; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4
810 ; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5
811 ; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6
812 ; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7
813 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
814 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
815 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
816 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
817 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
818 %src0.ext = fpext <4 x half> %src0 to <4 x float>
819 %src1.ext = fpext <4 x half> %src1 to <4 x float>
820 %src2.ext = fpext <4 x half> %src2 to <4 x float>
821 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
822 %cvt.result = fptrunc <4 x float> %result to <4 x half>
823 ret <4 x half> %cvt.result
826 ; FIXME (DAG): Fold clamp
828 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
829 ; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
831 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
833 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
834 ; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
835 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
836 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
838 ; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
840 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
842 ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
843 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
844 ; GFX900-NEXT: s_setpc_b64 s[30:31]
846 ; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt:
848 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849 ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
850 ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
851 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
852 ; GFX906-NEXT: s_setpc_b64 s[30:31]
854 ; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
856 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
858 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
859 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
860 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
861 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
862 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
863 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
864 ; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
865 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
866 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
867 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
868 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
870 ; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
872 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
874 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
875 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
876 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
877 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
878 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
879 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
880 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
881 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
882 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
883 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
884 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
885 ; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
886 ; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
887 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
888 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
889 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
890 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
891 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
893 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
895 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
897 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
898 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
899 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
900 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
901 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
902 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
903 ; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
904 ; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v5 clamp
905 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
906 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
907 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
909 ; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
911 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
913 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
914 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
915 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
916 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
917 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
918 ; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
919 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
920 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
921 ; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
922 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
923 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
924 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
925 ; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
926 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
927 ; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
928 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
929 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
930 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
931 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
932 ; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v2
933 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
934 ; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
935 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
936 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
937 %src0.ext = fpext <2 x half> %src0 to <2 x float>
938 %src1.ext = fpext <2 x half> %src1 to <2 x float>
939 %src2.ext = fpext <2 x half> %src2 to <2 x float>
940 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
941 %cvt.result = fptrunc <2 x float> %result to <2 x half>
942 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %cvt.result, <2 x half> zeroinitializer)
943 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
944 ret <2 x half> %clamp
947 ; FIXME (DAG): Should be packed into 2 registers per argument?
948 ; FIXME (GIsel): V_PK_MAX clamp could be folded into mixlo
950 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
951 ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
952 ; SDAG-GFX1100: ; %bb.0:
953 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
955 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
956 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
957 ; SDAG-GFX1100-NEXT: v_pack_b32_f16 v1, v1, 0
958 ; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
959 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
960 ; SDAG-GFX1100-NEXT: v_pk_max_f16 v1, v1, v1 clamp
961 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v3
962 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
964 ; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
965 ; SDAG-GFX900: ; %bb.0:
966 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
967 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
968 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
969 ; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
970 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
971 ; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
972 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
973 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
975 ; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
976 ; SDAG-GFX906: ; %bb.0:
977 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
979 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
980 ; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
981 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
982 ; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
983 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
984 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
986 ; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
988 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
990 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
991 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
992 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
993 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
994 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
995 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
996 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
997 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
998 ; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7
999 ; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
1000 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1001 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
1002 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
1003 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
1004 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
1005 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1007 ; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1009 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
1011 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1012 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1013 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1014 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1015 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1016 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1017 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1018 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1019 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1020 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1021 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1022 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1023 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1024 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1025 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1026 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1027 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1028 ; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3
1029 ; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4
1030 ; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5
1031 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
1032 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
1033 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
1034 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
1035 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
1036 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp
1037 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1039 ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1040 ; GISEL-GFX1100: ; %bb.0:
1041 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1042 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1043 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1044 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1045 ; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1046 ; GISEL-GFX1100-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1047 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
1048 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
1049 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1051 ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1052 ; GISEL-GFX900: ; %bb.0:
1053 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1055 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1056 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1057 ; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1058 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
1059 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1061 ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1062 ; GISEL-GFX906: ; %bb.0:
1063 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1064 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1065 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1066 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1067 ; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1068 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
1069 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1071 ; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1072 ; GISEL-VI: ; %bb.0:
1073 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
1075 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1076 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
1077 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1078 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
1079 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1080 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1081 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
1082 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
1083 ; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
1084 ; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
1085 ; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v8 clamp
1086 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v2, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1087 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
1088 ; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
1089 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v2
1090 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1092 ; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1093 ; GISEL-CI: ; %bb.0:
1094 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1095 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1096 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1097 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1098 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1099 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1100 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1101 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1102 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1103 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1104 ; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3
1105 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
1106 ; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4
1107 ; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5
1108 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
1109 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v8
1110 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1111 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
1112 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1113 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1114 ; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
1115 ; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
1116 ; GISEL-CI-NEXT: v_max_f32_e32 v2, v3, v2
1117 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1118 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1119 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1120 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, 1.0
1121 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1122 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1123 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1124 ; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v3
1125 ; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v3
1126 ; GISEL-CI-NEXT: v_min_f32_e32 v2, v2, v3
1127 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1128 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1129 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1130 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1131 %src0.ext = fpext <3 x half> %src0 to <3 x float>
1132 %src1.ext = fpext <3 x half> %src1 to <3 x float>
1133 %src2.ext = fpext <3 x half> %src2 to <3 x float>
1134 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
1135 %cvt.result = fptrunc <3 x float> %result to <3 x half>
1136 %max = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %cvt.result, <3 x half> zeroinitializer)
1137 %clamp = call <3 x half> @llvm.minnum.v3f16(<3 x half> %max, <3 x half> <half 1.0, half 1.0, half 1.0>)
1138 ret <3 x half> %clamp
1141 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
1142 ; GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1144 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1146 ; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1147 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1148 ; GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1149 ; GFX1100-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1150 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1151 ; GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1152 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1154 ; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157 ; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1158 ; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1159 ; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1160 ; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1161 ; GFX900-NEXT: v_mov_b32_e32 v0, v6
1162 ; GFX900-NEXT: v_mov_b32_e32 v1, v2
1163 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1167 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1169 ; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1170 ; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1171 ; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1172 ; GFX906-NEXT: v_mov_b32_e32 v0, v6
1173 ; GFX906-NEXT: v_mov_b32_e32 v1, v2
1174 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1176 ; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1178 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1180 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1181 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1182 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1183 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1184 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1185 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1186 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
1187 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1188 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1189 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
1190 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
1191 ; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9
1192 ; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8
1193 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
1194 ; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
1195 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1196 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1197 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
1198 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp
1199 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
1200 ; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1
1201 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1203 ; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1205 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
1207 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1208 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
1209 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1210 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1211 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
1212 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1213 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1214 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
1215 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1216 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1217 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1218 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
1219 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1220 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
1221 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1222 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
1223 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1224 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1225 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1226 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1227 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1228 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1229 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1230 ; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4
1231 ; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5
1232 ; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6
1233 ; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7
1234 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
1235 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
1236 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
1237 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
1238 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
1239 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
1240 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp
1241 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v3, v3 clamp
1242 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1244 ; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1245 ; GISEL-VI: ; %bb.0:
1246 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
1248 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1249 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
1250 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1251 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
1252 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1253 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
1254 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1255 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
1256 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1257 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
1258 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1259 ; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
1260 ; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
1261 ; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
1262 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
1263 ; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v10 clamp
1264 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1265 ; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v11 clamp
1266 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v3, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1267 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
1268 ; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v3
1269 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1271 ; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1272 ; GISEL-CI: ; %bb.0:
1273 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1274 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1275 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1276 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1277 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1278 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1279 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1280 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1281 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1282 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1283 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
1284 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
1285 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
1286 ; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4
1287 ; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5
1288 ; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6
1289 ; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7
1290 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
1291 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
1292 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v10
1293 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v4, v11
1294 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1295 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
1296 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1297 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1298 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1299 ; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
1300 ; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
1301 ; GISEL-CI-NEXT: v_max_f32_e32 v3, v3, v2
1302 ; GISEL-CI-NEXT: v_max_f32_e32 v2, v4, v2
1303 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1304 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1305 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1306 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1307 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1308 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, 1.0
1309 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1310 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1311 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v2
1312 ; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v5
1313 ; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v5
1314 ; GISEL-CI-NEXT: v_min_f32_e32 v2, v3, v5
1315 ; GISEL-CI-NEXT: v_min_f32_e32 v3, v4, v5
1316 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1317 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1318 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1319 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1320 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1321 %src0.ext = fpext <4 x half> %src0 to <4 x float>
1322 %src1.ext = fpext <4 x half> %src1 to <4 x float>
1323 %src2.ext = fpext <4 x half> %src2 to <4 x float>
1324 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
1325 %cvt.result = fptrunc <4 x float> %result to <4 x half>
1326 %max = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %cvt.result, <4 x half> zeroinitializer)
1327 %clamp = call <4 x half> @llvm.minnum.v4f16(<4 x half> %max, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>)
1328 ret <4 x half> %clamp
1331 ; FIXME (GISel): Packed Vectors handling isn't great for now, so we don't end up with
1332 ; a build_vector to select the mixhi. Issue is more specifically with how insert_vector_elt is being
1333 ; legalized (bitwise ops instead of shuffle/build_vector for instance).
1334 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1335 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1336 ; SDAG-GFX1100: ; %bb.0:
1337 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1339 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1340 ; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1341 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v3
1342 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1344 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1345 ; SDAG-GFX900: ; %bb.0:
1346 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1348 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1349 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
1350 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1352 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1353 ; SDAG-GFX906: ; %bb.0:
1354 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1356 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1357 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
1358 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1360 ; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1362 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1363 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1364 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1365 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1366 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1367 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1368 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1369 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
1370 ; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
1371 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1372 ; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
1373 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
1374 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1376 ; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1378 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1380 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1381 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1382 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1383 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1384 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1385 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1386 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1387 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1388 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1389 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1390 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1391 ; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
1392 ; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
1393 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
1394 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
1395 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
1396 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1397 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1399 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1400 ; GISEL-GFX1100: ; %bb.0:
1401 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1403 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1404 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, v3
1405 ; GISEL-GFX1100-NEXT: v_max_f16_e64 v3, v3, v3 clamp
1406 ; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1407 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1408 ; GISEL-GFX1100-NEXT: v_and_b32_e32 v0, 0xffff, v3
1409 ; GISEL-GFX1100-NEXT: v_and_or_b32 v0, 0xffff0000, v4, v0
1410 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1412 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1413 ; GISEL-GFX900: ; %bb.0:
1414 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1416 ; GISEL-GFX900-NEXT: v_max_f16_e64 v4, v3, v3 clamp
1417 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1418 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 0xffff0000
1419 ; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v0, v4
1420 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1422 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1423 ; GISEL-GFX906: ; %bb.0:
1424 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1426 ; GISEL-GFX906-NEXT: v_max_f16_e64 v4, v3, v3 clamp
1427 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1428 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 0xffff0000
1429 ; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v0, v4
1430 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1432 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1433 ; GISEL-VI: ; %bb.0:
1434 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
1436 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1437 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
1438 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1439 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
1440 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1441 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
1442 ; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
1443 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
1444 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1445 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
1446 ; GISEL-VI-NEXT: v_max_f16_e64 v1, v0, v0 clamp
1447 ; GISEL-VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1448 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
1449 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1451 ; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
1452 ; GISEL-CI: ; %bb.0:
1453 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1455 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1456 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1457 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1458 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1459 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1460 ; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
1461 ; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
1462 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v5
1463 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v4
1464 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
1465 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1466 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
1467 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
1468 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1469 ; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
1470 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1471 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
1472 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1473 ; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
1474 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1475 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
1476 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1477 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1478 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1479 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1480 %src2.ext = fpext <2 x half> %src2 to <2 x float>
1481 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
1482 %cvt.result = fptrunc <2 x float> %result to <2 x half>
1483 %cvt.lo = extractelement <2 x half> %cvt.result, i32 0
1484 %max.lo = call half @llvm.maxnum.f16(half %cvt.lo, half 0.0)
1485 %clamp.lo = call half @llvm.minnum.f16(half %max.lo, half 1.0)
1486 %insert = insertelement <2 x half> %cvt.result, half %clamp.lo, i32 0
1487 ret <2 x half> %insert
1490 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1491 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1492 ; SDAG-GFX1100: ; %bb.0:
1493 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1495 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1496 ; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1497 ; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v3
1498 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1500 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1501 ; SDAG-GFX900: ; %bb.0:
1502 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1504 ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1505 ; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
1506 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1508 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1509 ; SDAG-GFX906: ; %bb.0:
1510 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511 ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1512 ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1513 ; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
1514 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1516 ; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1518 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1520 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1521 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1522 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1523 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1524 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1525 ; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
1526 ; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
1527 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1528 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2
1529 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
1530 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1532 ; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1534 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1536 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1537 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1538 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1539 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1540 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1541 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1542 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1543 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1544 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1545 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1546 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1547 ; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
1548 ; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
1549 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
1550 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
1551 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1552 ; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
1553 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1555 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1556 ; GISEL-GFX1100: ; %bb.0:
1557 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1559 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1]
1560 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1561 ; GISEL-GFX1100-NEXT: v_and_b32_e32 v3, 0xffff, v3
1562 ; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1563 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1564 ; GISEL-GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v3
1565 ; GISEL-GFX1100-NEXT: v_and_or_b32 v0, 0xffff, v4, v0
1566 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1568 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1569 ; GISEL-GFX900: ; %bb.0:
1570 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1572 ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1573 ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1574 ; GISEL-GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v4
1575 ; GISEL-GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1576 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff
1577 ; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0
1578 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1580 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1581 ; GISEL-GFX906: ; %bb.0:
1582 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
1584 ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1585 ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1586 ; GISEL-GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v4
1587 ; GISEL-GFX906-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1588 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1589 ; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0
1590 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1592 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1593 ; GISEL-VI: ; %bb.0:
1594 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
1596 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1597 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
1598 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1599 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
1600 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1601 ; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
1602 ; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
1603 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
1604 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1605 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
1606 ; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1607 ; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1608 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1610 ; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
1611 ; GISEL-CI: ; %bb.0:
1612 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1614 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1615 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1616 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1617 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1618 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1619 ; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
1620 ; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
1621 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v5
1622 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v4
1623 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
1624 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1625 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
1626 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1627 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1628 ; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
1629 ; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
1630 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1631 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
1632 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1633 ; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
1634 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1635 ; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1636 ; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
1637 ; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1638 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1639 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1640 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1641 %src2.ext = fpext <2 x half> %src2 to <2 x float>
1642 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
1643 %cvt.result = fptrunc <2 x float> %result to <2 x half>
1644 %cvt.hi = extractelement <2 x half> %cvt.result, i32 1
1645 %max.hi = call half @llvm.maxnum.f16(half %cvt.hi, half 0.0)
1646 %clamp.hi = call half @llvm.minnum.f16(half %max.hi, half 1.0)
1647 %insert = insertelement <2 x half> %cvt.result, half %clamp.hi, i32 1
1648 ret <2 x half> %insert
1651 ; FIXME (DAG): Should be able to use mixlo/mixhi
1653 define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1654 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
1655 ; SDAG-GFX1100: ; %bb.0:
1656 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1657 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1658 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1659 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1660 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v3
1661 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
1662 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1663 ; SDAG-GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1
1664 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1666 ; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
1667 ; SDAG-GFX900: ; %bb.0:
1668 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1669 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1670 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1671 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3
1672 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
1673 ; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v1
1674 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1676 ; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
1677 ; SDAG-GFX906: ; %bb.0:
1678 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1680 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1681 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3
1682 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
1683 ; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v1
1684 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1686 ; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_precvt:
1688 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1690 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1691 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1692 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1693 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1694 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1695 ; SDAG-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
1696 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1697 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1698 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
1699 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
1700 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1702 ; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
1704 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1706 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1707 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1708 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1709 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1710 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1711 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1712 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1713 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1714 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1715 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1716 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1717 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
1718 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
1719 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1720 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1721 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1722 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1723 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1725 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
1726 ; GISEL-GFX1100: ; %bb.0:
1727 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1728 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1729 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1730 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1731 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v3
1732 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
1733 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1734 ; GISEL-GFX1100-NEXT: v_pack_b32_f16 v0, v1, v0
1735 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1737 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
1738 ; GISEL-GFX900: ; %bb.0:
1739 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1740 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1741 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1742 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3
1743 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
1744 ; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
1745 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1747 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
1748 ; GISEL-GFX906: ; %bb.0:
1749 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1750 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
1751 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1752 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3
1753 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
1754 ; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v1, v0
1755 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1757 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_precvt:
1758 ; GISEL-VI: ; %bb.0:
1759 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
1761 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1762 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
1763 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1764 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
1765 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1766 ; GISEL-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
1767 ; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
1768 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v3
1769 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1770 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v1, v0
1771 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1773 ; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
1774 ; GISEL-CI: ; %bb.0:
1775 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1777 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1778 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1779 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1780 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1781 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1782 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
1783 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
1784 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1785 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1786 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1787 %src0.ext = fpext <2 x half> %src0 to <2 x float>
1788 %src1.ext = fpext <2 x half> %src1 to <2 x float>
1789 %src2.ext = fpext <2 x half> %src2 to <2 x float>
1790 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
1791 %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %result, <2 x float> zeroinitializer)
1792 %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>)
1793 %cvt.result = fptrunc <2 x float> %clamp to <2 x half>
1794 ret <2 x half> %cvt.result
1797 ; FIXME (DAG): Handling undef 4th component
1799 define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
1800 ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
1801 ; SDAG-GFX1100: ; %bb.0:
1802 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1803 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1804 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1805 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1806 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1807 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v2, v6
1808 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
1809 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1810 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v1
1811 ; SDAG-GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2
1812 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1814 ; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
1815 ; SDAG-GFX900: ; %bb.0:
1816 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1818 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1819 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
1820 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
1821 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1822 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
1823 ; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v2
1824 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1826 ; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
1827 ; SDAG-GFX906: ; %bb.0:
1828 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1829 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1830 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1831 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
1832 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
1833 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1834 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
1835 ; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v2
1836 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1838 ; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_precvt:
1840 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1842 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1843 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1844 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
1845 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1846 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
1847 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1848 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
1849 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
1850 ; SDAG-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
1851 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
1852 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1853 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
1854 ; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
1855 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
1856 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v2
1857 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
1859 ; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
1861 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
1863 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1864 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1865 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1866 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1867 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1868 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1869 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1870 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1871 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1872 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1873 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1874 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1875 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1876 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1877 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1878 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1879 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1880 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp
1881 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp
1882 ; SDAG-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp
1883 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1884 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1885 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1886 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1887 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1888 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1889 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
1891 ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
1892 ; GISEL-GFX1100: ; %bb.0:
1893 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1894 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1895 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1896 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1897 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1898 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v2, v6
1899 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
1900 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1901 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v1
1902 ; GISEL-GFX1100-NEXT: v_pack_b32_f16 v0, v2, v0
1903 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1905 ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
1906 ; GISEL-GFX900: ; %bb.0:
1907 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1908 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1909 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1910 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
1911 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
1912 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1913 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
1914 ; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
1915 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1917 ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
1918 ; GISEL-GFX906: ; %bb.0:
1919 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1920 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1921 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1922 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
1923 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
1924 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1925 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
1926 ; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v2, v0
1927 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1929 ; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_precvt:
1930 ; GISEL-VI: ; %bb.0:
1931 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1932 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
1933 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1934 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
1935 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1936 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
1937 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1938 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
1939 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
1940 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
1941 ; GISEL-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
1942 ; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
1943 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v6
1944 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
1945 ; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
1946 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
1947 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v2, v0
1948 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
1950 ; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
1951 ; GISEL-CI: ; %bb.0:
1952 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
1954 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
1955 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1956 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1957 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1958 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1959 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1960 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1961 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1962 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp
1963 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp
1964 ; GISEL-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp
1965 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
1966 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
1967 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1968 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
1969 %src0.ext = fpext <3 x half> %src0 to <3 x float>
1970 %src1.ext = fpext <3 x half> %src1 to <3 x float>
1971 %src2.ext = fpext <3 x half> %src2 to <3 x float>
1972 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
1973 %max = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %result, <3 x float> zeroinitializer)
1974 %clamp = call <3 x float> @llvm.minnum.v3f32(<3 x float> %max, <3 x float> <float 1.0, float 1.0, float 1.0>)
1975 %cvt.result = fptrunc <3 x float> %clamp to <3 x half>
1976 ret <3 x half> %cvt.result
1979 define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
1980 ; SDAG-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
1981 ; SDAG-GFX1100: ; %bb.0:
1982 ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1984 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1985 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1986 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1987 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1988 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v2, v6
1989 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v3, v7
1990 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1991 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
1992 ; SDAG-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v1
1993 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1994 ; SDAG-GFX1100-NEXT: v_pack_b32_f16 v0, v0, v3
1995 ; SDAG-GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
1996 ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
1998 ; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
1999 ; SDAG-GFX900: ; %bb.0:
2000 ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2001 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2002 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
2003 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2004 ; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
2005 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
2006 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3
2007 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
2008 ; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
2009 ; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v3
2010 ; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, v2
2011 ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
2013 ; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
2014 ; SDAG-GFX906: ; %bb.0:
2015 ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2017 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
2018 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2019 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
2020 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
2021 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3
2022 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
2023 ; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
2024 ; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v3
2025 ; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, v2
2026 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
2028 ; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_precvt:
2030 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2031 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2032 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2033 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
2034 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
2035 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2036 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2037 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
2038 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
2039 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2040 ; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2041 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
2042 ; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
2043 ; SDAG-VI-NEXT: v_mad_f32 v7, v7, v9, v10 clamp
2044 ; SDAG-VI-NEXT: v_mad_f32 v6, v6, v8, v11 clamp
2045 ; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
2046 ; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
2047 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
2048 ; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
2049 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
2050 ; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
2051 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v3
2052 ; SDAG-VI-NEXT: v_or_b32_e32 v1, v1, v2
2053 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
2055 ; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
2057 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2058 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
2059 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2060 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
2061 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2062 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2063 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
2064 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2065 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2066 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
2067 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2068 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2069 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
2070 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
2071 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2072 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
2073 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2074 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
2075 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2076 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
2077 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2078 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2079 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2080 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2081 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
2082 ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp
2083 ; SDAG-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp
2084 ; SDAG-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp
2085 ; SDAG-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp
2086 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2087 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
2088 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2089 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2090 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2091 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2092 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2093 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
2094 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2096 ; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
2097 ; GISEL-GFX1100: ; %bb.0:
2098 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2099 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
2100 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2101 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
2102 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2103 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2104 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v3, v6
2105 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
2106 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2107 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v2, v2
2108 ; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v1, v1
2109 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2110 ; GISEL-GFX1100-NEXT: v_pack_b32_f16 v0, v3, v0
2111 ; GISEL-GFX1100-NEXT: v_pack_b32_f16 v1, v2, v1
2112 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
2114 ; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
2115 ; GISEL-GFX900: ; %bb.0:
2116 ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2117 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
2118 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2119 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
2120 ; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2121 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v6
2122 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
2123 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
2124 ; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
2125 ; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v3, v0
2126 ; GISEL-GFX900-NEXT: v_pack_b32_f16 v1, v2, v1
2127 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
2129 ; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
2130 ; GISEL-GFX906: ; %bb.0:
2131 ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2132 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
2133 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2134 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
2135 ; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
2136 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v6
2137 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
2138 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v2
2139 ; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
2140 ; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v3, v0
2141 ; GISEL-GFX906-NEXT: v_pack_b32_f16 v1, v2, v1
2142 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
2144 ; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_precvt:
2145 ; GISEL-VI: ; %bb.0:
2146 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2147 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
2148 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2149 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
2150 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2151 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
2152 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2153 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
2154 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2155 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
2156 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2157 ; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
2158 ; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2159 ; GISEL-VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp
2160 ; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
2161 ; GISEL-VI-NEXT: v_mad_f32 v2, v7, v9, v11 clamp
2162 ; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
2163 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v6
2164 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
2165 ; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v2
2166 ; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
2167 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v3, v0
2168 ; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v1
2169 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
2171 ; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
2172 ; GISEL-CI: ; %bb.0:
2173 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2174 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2175 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2176 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2177 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
2178 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2179 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2180 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2181 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2182 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
2183 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
2184 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
2185 ; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
2186 ; GISEL-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp
2187 ; GISEL-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp
2188 ; GISEL-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp
2189 ; GISEL-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp
2190 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2191 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
2192 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2193 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2194 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2195 %src0.ext = fpext <4 x half> %src0 to <4 x float>
2196 %src1.ext = fpext <4 x half> %src1 to <4 x float>
2197 %src2.ext = fpext <4 x half> %src2 to <4 x float>
2198 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
2199 %max = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %result, <4 x float> zeroinitializer)
2200 %clamp = call <4 x float> @llvm.minnum.v4f32(<4 x float> %max, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
2201 %cvt.result = fptrunc <4 x float> %clamp to <4 x half>
2202 ret <4 x half> %cvt.result
2205 define i32 @mixlo_zext(float %src0, float %src1, float %src2) #0 {
2206 ; GFX1100-LABEL: mixlo_zext:
2208 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2209 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
2210 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
2211 ; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff, v0
2212 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2214 ; GFX900-LABEL: mixlo_zext:
2216 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2
2218 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
2219 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2221 ; GFX906-LABEL: mixlo_zext:
2223 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2224 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
2225 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
2226 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2228 ; VI-LABEL: mixlo_zext:
2230 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2231 ; VI-NEXT: v_mac_f32_e32 v2, v0, v1
2232 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
2233 ; VI-NEXT: s_setpc_b64 s[30:31]
2235 ; SDAG-CI-LABEL: mixlo_zext:
2237 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2238 ; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
2239 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
2240 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2242 ; GISEL-CI-LABEL: mixlo_zext:
2243 ; GISEL-CI: ; %bb.0:
2244 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2245 ; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
2246 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
2247 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2248 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
2249 %cvt.result = fptrunc float %result to half
2250 %cvt.result.i16 = bitcast half %cvt.result to i16
2251 %cvt.result.i32 = zext i16 %cvt.result.i16 to i32
2252 ret i32 %cvt.result.i32
2255 define half @mixlo_fptrunc(float %a, float %b) #0 {
2256 ; GFX1100-LABEL: mixlo_fptrunc:
2257 ; GFX1100: ; %bb.0: ; %.entry
2258 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, 0
2260 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2262 ; GFX900-LABEL: mixlo_fptrunc:
2263 ; GFX900: ; %bb.0: ; %.entry
2264 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2265 ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, 0
2266 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2268 ; GFX906-LABEL: mixlo_fptrunc:
2269 ; GFX906: ; %bb.0: ; %.entry
2270 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2271 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, 0
2272 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2274 ; VI-LABEL: mixlo_fptrunc:
2275 ; VI: ; %bb.0: ; %.entry
2276 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2277 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
2278 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
2279 ; VI-NEXT: s_setpc_b64 s[30:31]
2281 ; SDAG-CI-LABEL: mixlo_fptrunc:
2282 ; SDAG-CI: ; %bb.0: ; %.entry
2283 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2285 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2286 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2287 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2289 ; GISEL-CI-LABEL: mixlo_fptrunc:
2290 ; GISEL-CI: ; %bb.0: ; %.entry
2291 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2293 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2294 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2296 %mul = fmul float %a, %b
2297 %trunc = fptrunc float %mul to half
2301 define half @mixlo_fptrunc_no_flush(float %a, float %b) {
2302 ; GFX1100-LABEL: mixlo_fptrunc_no_flush:
2303 ; GFX1100: ; %bb.0: ; %.entry
2304 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, 0
2306 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2308 ; GFX900-LABEL: mixlo_fptrunc_no_flush:
2309 ; GFX900: ; %bb.0: ; %.entry
2310 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2311 ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
2312 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
2313 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2315 ; GFX906-LABEL: mixlo_fptrunc_no_flush:
2316 ; GFX906: ; %bb.0: ; %.entry
2317 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2318 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, 0
2319 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2321 ; VI-LABEL: mixlo_fptrunc_no_flush:
2322 ; VI: ; %bb.0: ; %.entry
2323 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2324 ; VI-NEXT: v_mul_f32_e32 v0, v0, v1
2325 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
2326 ; VI-NEXT: s_setpc_b64 s[30:31]
2328 ; SDAG-CI-LABEL: mixlo_fptrunc_no_flush:
2329 ; SDAG-CI: ; %bb.0: ; %.entry
2330 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331 ; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2332 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2333 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2334 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2336 ; GISEL-CI-LABEL: mixlo_fptrunc_no_flush:
2337 ; GISEL-CI: ; %bb.0: ; %.entry
2338 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339 ; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
2340 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2341 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2343 %mul = fmul float %a, %b
2344 %trunc = fptrunc float %mul to half
2348 define half @mixlo_fptrunc_abs_src_mod(float %a, float %b) #0 {
2349 ; GFX1100-LABEL: mixlo_fptrunc_abs_src_mod:
2350 ; GFX1100: ; %bb.0: ; %.entry
2351 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2352 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, |v0|, v1, 0
2353 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2355 ; GFX900-LABEL: mixlo_fptrunc_abs_src_mod:
2356 ; GFX900: ; %bb.0: ; %.entry
2357 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358 ; GFX900-NEXT: v_mad_mixlo_f16 v0, |v0|, v1, 0
2359 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2361 ; GFX906-LABEL: mixlo_fptrunc_abs_src_mod:
2362 ; GFX906: ; %bb.0: ; %.entry
2363 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2364 ; GFX906-NEXT: v_fma_mixlo_f16 v0, |v0|, v1, 0
2365 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2367 ; VI-LABEL: mixlo_fptrunc_abs_src_mod:
2368 ; VI: ; %bb.0: ; %.entry
2369 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370 ; VI-NEXT: v_mul_f32_e64 v0, |v0|, v1
2371 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
2372 ; VI-NEXT: s_setpc_b64 s[30:31]
2374 ; SDAG-CI-LABEL: mixlo_fptrunc_abs_src_mod:
2375 ; SDAG-CI: ; %bb.0: ; %.entry
2376 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377 ; SDAG-CI-NEXT: v_mul_f32_e64 v0, |v0|, v1
2378 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2379 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2380 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2382 ; GISEL-CI-LABEL: mixlo_fptrunc_abs_src_mod:
2383 ; GISEL-CI: ; %bb.0: ; %.entry
2384 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385 ; GISEL-CI-NEXT: v_mul_f32_e64 v0, |v0|, v1
2386 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2387 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2389 %a.fabs = call float @llvm.fabs.f32(float %a)
2390 %mul = fmul float %a.fabs, %b
2391 %trunc = fptrunc float %mul to half
2395 define half @mixlo_fptrunc_neg_src_mod(float %a, float %b) #0 {
2396 ; GFX1100-LABEL: mixlo_fptrunc_neg_src_mod:
2397 ; GFX1100: ; %bb.0: ; %.entry
2398 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2399 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, -v0, v1, 0
2400 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2402 ; GFX900-LABEL: mixlo_fptrunc_neg_src_mod:
2403 ; GFX900: ; %bb.0: ; %.entry
2404 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405 ; GFX900-NEXT: v_mad_mixlo_f16 v0, -v0, v1, 0
2406 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2408 ; GFX906-LABEL: mixlo_fptrunc_neg_src_mod:
2409 ; GFX906: ; %bb.0: ; %.entry
2410 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411 ; GFX906-NEXT: v_fma_mixlo_f16 v0, -v0, v1, 0
2412 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2414 ; VI-LABEL: mixlo_fptrunc_neg_src_mod:
2415 ; VI: ; %bb.0: ; %.entry
2416 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2417 ; VI-NEXT: v_mul_f32_e64 v0, -v0, v1
2418 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
2419 ; VI-NEXT: s_setpc_b64 s[30:31]
2421 ; SDAG-CI-LABEL: mixlo_fptrunc_neg_src_mod:
2422 ; SDAG-CI: ; %bb.0: ; %.entry
2423 ; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2424 ; SDAG-CI-NEXT: v_mul_f32_e64 v0, -v0, v1
2425 ; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2426 ; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
2427 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
2429 ; GISEL-CI-LABEL: mixlo_fptrunc_neg_src_mod:
2430 ; GISEL-CI: ; %bb.0: ; %.entry
2431 ; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2432 ; GISEL-CI-NEXT: v_mul_f32_e64 v0, -v0, v1
2433 ; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
2434 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
2436 %a.fneg = fneg float %a
2437 %mul = fmul float %a.fneg, %b
2438 %trunc = fptrunc float %mul to half
2442 declare float @llvm.fabs.f32(float) #1
2444 declare half @llvm.minnum.f16(half, half) #1
2445 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
2446 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
2447 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1
2449 declare half @llvm.maxnum.f16(half, half) #1
2450 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
2451 declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1
2452 declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1
2454 declare float @llvm.minnum.f32(float, float) #1
2455 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
2456 declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
2457 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
2459 declare float @llvm.maxnum.f32(float, float) #1
2460 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
2461 declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
2462 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
2464 declare float @llvm.fmuladd.f32(float, float, float) #1
2465 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
2466 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
2467 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
2469 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2470 attributes #1 = { nounwind readnone speculatable }