1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10,GFX10-SDAG %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10,GFX10-GISEL %s
6 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11,GFX11-SDAG %s
7 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11,GFX11-GISEL %s
9 ; Test fmul by power of 2 which is better emitted as ldexp
11 declare half @llvm.fabs.f16(half)
12 declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
13 declare float @llvm.fabs.f32(float)
14 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
15 declare double @llvm.fabs.f64(double)
16 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
17 declare i32 @llvm.amdgcn.readfirstlane(i32)
19 define float @v_mul_42_f32(float %x) {
20 ; GCN-LABEL: v_mul_42_f32:
22 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GCN-NEXT: v_mul_f32_e32 v0, 0x42280000, v0
24 ; GCN-NEXT: s_setpc_b64 s[30:31]
25 %mul = fmul float %x, 42.0
29 define double @v_mul_42_f64(double %x) {
30 ; GFX9-SDAG-LABEL: v_mul_42_f64:
32 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
34 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40450000
35 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
36 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
38 ; GFX9-GISEL-LABEL: v_mul_42_f64:
39 ; GFX9-GISEL: ; %bb.0:
40 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
42 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40450000
43 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
44 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
46 ; GFX1011-LABEL: v_mul_42_f64:
48 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX1011-NEXT: v_mul_f64 v[0:1], 0x40450000, v[0:1]
50 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
51 %mul = fmul double %x, 42.0
55 define half @v_mul_42_f16(half %x) {
56 ; GCN-LABEL: v_mul_42_f16:
58 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5140, v0
60 ; GCN-NEXT: s_setpc_b64 s[30:31]
61 %mul = fmul half %x, 42.0
65 define <2 x half> @v_mul_42_v2f16(<2 x half> %x) {
66 ; GFX9-SDAG-LABEL: v_mul_42_v2f16:
68 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5140
70 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
71 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
73 ; GFX9-GISEL-LABEL: v_mul_42_v2f16:
74 ; GFX9-GISEL: ; %bb.0:
75 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x51405140
77 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
78 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
80 ; GFX1011-LABEL: v_mul_42_v2f16:
82 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX1011-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
84 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
85 %mul = fmul <2 x half> %x, <half 42.0, half 42.0>
90 define float @v_mul_0x1pn17_f32(float %x) {
91 ; GCN-LABEL: v_mul_0x1pn17_f32:
93 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GCN-NEXT: v_mul_f32_e32 v0, 0x37000000, v0
95 ; GCN-NEXT: s_setpc_b64 s[30:31]
96 %mul = fmul float %x, 0.00000762939453125
101 define float @v_mul_0x1pn16_f32(float %x) {
102 ; GCN-LABEL: v_mul_0x1pn16_f32:
104 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GCN-NEXT: v_mul_f32_e32 v0, 0x37800000, v0
106 ; GCN-NEXT: s_setpc_b64 s[30:31]
107 %mul = fmul float %x, 0.0000152587890625
112 define float @v_mul_0x1pn15_f32(float %x) {
113 ; GCN-LABEL: v_mul_0x1pn15_f32:
115 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GCN-NEXT: v_mul_f32_e32 v0, 0x38000000, v0
117 ; GCN-NEXT: s_setpc_b64 s[30:31]
118 %mul = fmul float %x, 0.000030517578125
122 define float @v_mul_neg256_f32(float %x) {
123 ; GCN-LABEL: v_mul_neg256_f32:
125 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc3800000, v0
127 ; GCN-NEXT: s_setpc_b64 s[30:31]
128 %mul = fmul float %x, -256.0
132 define float @v_mul_neg128_f32(float %x) {
133 ; GCN-LABEL: v_mul_neg128_f32:
135 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc3000000, v0
137 ; GCN-NEXT: s_setpc_b64 s[30:31]
138 %mul = fmul float %x, -128.0
142 define float @v_mul_neg64_f32(float %x) {
143 ; GCN-LABEL: v_mul_neg64_f32:
145 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc2800000, v0
147 ; GCN-NEXT: s_setpc_b64 s[30:31]
148 %mul = fmul float %x, -64.0
152 define float @v_mul_neg32_f32(float %x) {
153 ; GCN-LABEL: v_mul_neg32_f32:
155 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc2000000, v0
157 ; GCN-NEXT: s_setpc_b64 s[30:31]
158 %mul = fmul float %x, -32.0
162 define float @v_mul_neg16_f32(float %x) {
163 ; GCN-LABEL: v_mul_neg16_f32:
165 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc1800000, v0
167 ; GCN-NEXT: s_setpc_b64 s[30:31]
168 %mul = fmul float %x, -16.0
172 define float @v_mul_neg8_f32(float %x) {
173 ; GCN-LABEL: v_mul_neg8_f32:
175 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176 ; GCN-NEXT: v_mul_f32_e32 v0, 0xc1000000, v0
177 ; GCN-NEXT: s_setpc_b64 s[30:31]
178 %mul = fmul float %x, -8.0
182 define float @v_mul_neg4_f32(float %x) {
183 ; GCN-LABEL: v_mul_neg4_f32:
185 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GCN-NEXT: v_mul_f32_e32 v0, -4.0, v0
187 ; GCN-NEXT: s_setpc_b64 s[30:31]
188 %mul = fmul float %x, -4.0
192 define float @v_mul_neg2_f32(float %x) {
193 ; GCN-LABEL: v_mul_neg2_f32:
195 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GCN-NEXT: v_mul_f32_e32 v0, -2.0, v0
197 ; GCN-NEXT: s_setpc_b64 s[30:31]
198 %mul = fmul float %x, -2.0
202 define float @v_mul_neg1_f32(float %x) {
203 ; GFX9-SDAG-LABEL: v_mul_neg1_f32:
204 ; GFX9-SDAG: ; %bb.0:
205 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
207 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
209 ; GFX9-GISEL-LABEL: v_mul_neg1_f32:
210 ; GFX9-GISEL: ; %bb.0:
211 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
213 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
215 ; GFX10-SDAG-LABEL: v_mul_neg1_f32:
216 ; GFX10-SDAG: ; %bb.0:
217 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
219 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
221 ; GFX10-GISEL-LABEL: v_mul_neg1_f32:
222 ; GFX10-GISEL: ; %bb.0:
223 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
225 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
227 ; GFX11-SDAG-LABEL: v_mul_neg1_f32:
228 ; GFX11-SDAG: ; %bb.0:
229 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
231 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
233 ; GFX11-GISEL-LABEL: v_mul_neg1_f32:
234 ; GFX11-GISEL: ; %bb.0:
235 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
237 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
238 %mul = fmul float %x, -1.0
242 define float @v_mul_neg_half_f32(float %x) {
243 ; GCN-LABEL: v_mul_neg_half_f32:
245 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GCN-NEXT: v_mul_f32_e32 v0, -0.5, v0
247 ; GCN-NEXT: s_setpc_b64 s[30:31]
248 %mul = fmul float %x, -0.5
252 define float @v_mul_neg_quarter_f32(float %x) {
253 ; GCN-LABEL: v_mul_neg_quarter_f32:
255 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GCN-NEXT: v_mul_f32_e32 v0, 0xbe800000, v0
257 ; GCN-NEXT: s_setpc_b64 s[30:31]
258 %mul = fmul float %x, -0.25
262 define float @v_mul_quarter_f32(float %x) {
263 ; GCN-LABEL: v_mul_quarter_f32:
265 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3e800000, v0
267 ; GCN-NEXT: s_setpc_b64 s[30:31]
268 %mul = fmul float %x, 0.25
272 define float @v_mul_half_f32(float %x) {
273 ; GCN-LABEL: v_mul_half_f32:
275 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; GCN-NEXT: v_mul_f32_e32 v0, 0.5, v0
277 ; GCN-NEXT: s_setpc_b64 s[30:31]
278 %mul = fmul float %x, 0.5
282 define float @v_mul_1_f32(float %x) {
283 ; GCN-LABEL: v_mul_1_f32:
285 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; GCN-NEXT: s_setpc_b64 s[30:31]
287 %mul = fmul float %x, 1.0
291 define float @v_mul_2_f32(float %x) {
292 ; GFX9-SDAG-LABEL: v_mul_2_f32:
293 ; GFX9-SDAG: ; %bb.0:
294 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v0, v0
296 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
298 ; GFX9-GISEL-LABEL: v_mul_2_f32:
299 ; GFX9-GISEL: ; %bb.0:
300 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, 2.0, v0
302 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
304 ; GFX10-SDAG-LABEL: v_mul_2_f32:
305 ; GFX10-SDAG: ; %bb.0:
306 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v0, v0
308 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
310 ; GFX10-GISEL-LABEL: v_mul_2_f32:
311 ; GFX10-GISEL: ; %bb.0:
312 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313 ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, 2.0, v0
314 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
316 ; GFX11-SDAG-LABEL: v_mul_2_f32:
317 ; GFX11-SDAG: ; %bb.0:
318 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; GFX11-SDAG-NEXT: v_add_f32_e32 v0, v0, v0
320 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
322 ; GFX11-GISEL-LABEL: v_mul_2_f32:
323 ; GFX11-GISEL: ; %bb.0:
324 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325 ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, 2.0, v0
326 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
327 %mul = fmul float %x, 2.0
331 define float @v_mul_4_f32(float %x) {
332 ; GCN-LABEL: v_mul_4_f32:
334 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GCN-NEXT: v_mul_f32_e32 v0, 4.0, v0
336 ; GCN-NEXT: s_setpc_b64 s[30:31]
337 %mul = fmul float %x, 4.0
341 define float @v_mul_8_f32(float %x) {
342 ; GCN-LABEL: v_mul_8_f32:
344 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GCN-NEXT: v_mul_f32_e32 v0, 0x41000000, v0
346 ; GCN-NEXT: s_setpc_b64 s[30:31]
347 %mul = fmul float %x, 8.0
351 define float @v_mul_16_f32(float %x) {
352 ; GCN-LABEL: v_mul_16_f32:
354 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GCN-NEXT: v_mul_f32_e32 v0, 0x41800000, v0
356 ; GCN-NEXT: s_setpc_b64 s[30:31]
357 %mul = fmul float %x, 16.0
361 define float @v_mul_32_f32(float %x) {
362 ; GCN-LABEL: v_mul_32_f32:
364 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365 ; GCN-NEXT: v_mul_f32_e32 v0, 0x42000000, v0
366 ; GCN-NEXT: s_setpc_b64 s[30:31]
367 %mul = fmul float %x, 32.0
371 define float @v_mul_64_f32(float %x) {
372 ; GCN-LABEL: v_mul_64_f32:
374 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375 ; GCN-NEXT: v_mul_f32_e32 v0, 0x42800000, v0
376 ; GCN-NEXT: s_setpc_b64 s[30:31]
377 %mul = fmul float %x, 64.0
381 define float @v_mul_128_f32(float %x) {
382 ; GCN-LABEL: v_mul_128_f32:
384 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; GCN-NEXT: v_mul_f32_e32 v0, 0x43000000, v0
386 ; GCN-NEXT: s_setpc_b64 s[30:31]
387 %mul = fmul float %x, 128.0
391 define float @v_mul_256_f32(float %x) {
392 ; GCN-LABEL: v_mul_256_f32:
394 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GCN-NEXT: v_mul_f32_e32 v0, 0x43800000, v0
396 ; GCN-NEXT: s_setpc_b64 s[30:31]
397 %mul = fmul float %x, 256.0
402 define float @v_mul_0x1p63_f32(float %x) {
403 ; GCN-LABEL: v_mul_0x1p63_f32:
405 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f000000, v0
407 ; GCN-NEXT: s_setpc_b64 s[30:31]
408 %mul = fmul float %x, 9223372036854775808.0
413 define float @v_mul_0x1p64_f32(float %x) {
414 ; GCN-LABEL: v_mul_0x1p64_f32:
416 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f800000, v0
418 ; GCN-NEXT: s_setpc_b64 s[30:31]
419 %mul = fmul float %x, 18446744073709551616.0
424 define float @v_mul_0x1p65_f32(float %x) {
425 ; GCN-LABEL: v_mul_0x1p65_f32:
427 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GCN-NEXT: v_mul_f32_e32 v0, 0x60000000, v0
429 ; GCN-NEXT: s_setpc_b64 s[30:31]
430 %mul = fmul float %x, 36893488147419103232.0
434 ; Check that this doesn't interfer with fma formation
435 define float @v_fma_mul_add_32_f32(float %x, float %y) {
436 ; GFX9-SDAG-LABEL: v_fma_mul_add_32_f32:
437 ; GFX9-SDAG: ; %bb.0:
438 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x42000000
440 ; GFX9-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
441 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
443 ; GFX9-GISEL-LABEL: v_fma_mul_add_32_f32:
444 ; GFX9-GISEL: ; %bb.0:
445 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
447 ; GFX9-GISEL-NEXT: v_fma_f32 v0, v0, v2, v1
448 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
450 ; GFX1011-LABEL: v_fma_mul_add_32_f32:
452 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX1011-NEXT: v_fmamk_f32 v0, v0, 0x42000000, v1
454 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
455 %mul = fmul contract float %x, 32.0
456 %fma = fadd contract float %mul, %y
460 define float @v_fma_mul_sub_32_f32(float %x, float %y) {
461 ; GFX9-SDAG-LABEL: v_fma_mul_sub_32_f32:
462 ; GFX9-SDAG: ; %bb.0:
463 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x42000000
465 ; GFX9-SDAG-NEXT: v_fma_f32 v0, v0, s4, -v1
466 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
468 ; GFX9-GISEL-LABEL: v_fma_mul_sub_32_f32:
469 ; GFX9-GISEL: ; %bb.0:
470 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
472 ; GFX9-GISEL-NEXT: v_fma_f32 v0, v0, v2, -v1
473 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
475 ; GFX1011-LABEL: v_fma_mul_sub_32_f32:
477 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478 ; GFX1011-NEXT: v_fma_f32 v0, 0x42000000, v0, -v1
479 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
480 %mul = fmul contract float %x, 32.0
481 %fma = fsub contract float %mul, %y
485 define float @v_fma_mul_add_neg32_f32(float %x, float %y) {
486 ; GFX9-SDAG-LABEL: v_fma_mul_add_neg32_f32:
487 ; GFX9-SDAG: ; %bb.0:
488 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xc2000000
490 ; GFX9-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
491 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
493 ; GFX9-GISEL-LABEL: v_fma_mul_add_neg32_f32:
494 ; GFX9-GISEL: ; %bb.0:
495 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2000000
497 ; GFX9-GISEL-NEXT: v_fma_f32 v0, v0, v2, v1
498 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
500 ; GFX1011-LABEL: v_fma_mul_add_neg32_f32:
502 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX1011-NEXT: v_fmamk_f32 v0, v0, 0xc2000000, v1
504 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
505 %mul = fmul contract float %x, -32.0
506 %fma = fadd contract float %mul, %y
510 define float @v_mul_fabs_32_f32(float %x) {
511 ; GFX9-SDAG-LABEL: v_mul_fabs_32_f32:
512 ; GFX9-SDAG: ; %bb.0:
513 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x42000000
515 ; GFX9-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
516 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
518 ; GFX9-GISEL-LABEL: v_mul_fabs_32_f32:
519 ; GFX9-GISEL: ; %bb.0:
520 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
522 ; GFX9-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
523 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
525 ; GFX1011-LABEL: v_mul_fabs_32_f32:
527 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528 ; GFX1011-NEXT: v_mul_f32_e64 v0, 0x42000000, |v0|
529 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
530 %x.fabs = call float @llvm.fabs.f32(float %x)
531 %mul = fmul float %x.fabs, 32.0
535 define float @v_mul_add_fma_fabs_32_f32(float %x, float %y) {
536 ; GFX9-SDAG-LABEL: v_mul_add_fma_fabs_32_f32:
537 ; GFX9-SDAG: ; %bb.0:
538 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x42000000
540 ; GFX9-SDAG-NEXT: v_fma_f32 v0, |v0|, s4, v1
541 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
543 ; GFX9-GISEL-LABEL: v_mul_add_fma_fabs_32_f32:
544 ; GFX9-GISEL: ; %bb.0:
545 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
547 ; GFX9-GISEL-NEXT: v_fma_f32 v0, |v0|, v2, v1
548 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
550 ; GFX1011-LABEL: v_mul_add_fma_fabs_32_f32:
552 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX1011-NEXT: v_fma_f32 v0, 0x42000000, |v0|, v1
554 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
555 %x.fabs = call float @llvm.fabs.f32(float %x)
556 %mul = fmul contract float %x.fabs, 32.0
557 %fma = fadd contract float %mul, %y
561 define <2 x float> @v_mul_16_v2f32(<2 x float> %x) {
562 ; GFX9-LABEL: v_mul_16_v2f32:
564 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x41800000, v0
566 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x41800000, v1
567 ; GFX9-NEXT: s_setpc_b64 s[30:31]
569 ; GFX10-LABEL: v_mul_16_v2f32:
571 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x41800000, v0
573 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x41800000, v1
574 ; GFX10-NEXT: s_setpc_b64 s[30:31]
576 ; GFX11-LABEL: v_mul_16_v2f32:
578 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x41800000, v0 :: v_dual_mul_f32 v1, 0x41800000, v1
580 ; GFX11-NEXT: s_setpc_b64 s[30:31]
581 %mul = fmul <2 x float> %x, <float 16.0, float 16.0>
585 define <2 x float> @v_mul_neg16_v2f32(<2 x float> %x) {
586 ; GFX9-LABEL: v_mul_neg16_v2f32:
588 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589 ; GFX9-NEXT: v_mul_f32_e32 v0, 0xc1800000, v0
590 ; GFX9-NEXT: v_mul_f32_e32 v1, 0xc1800000, v1
591 ; GFX9-NEXT: s_setpc_b64 s[30:31]
593 ; GFX10-LABEL: v_mul_neg16_v2f32:
595 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596 ; GFX10-NEXT: v_mul_f32_e32 v0, 0xc1800000, v0
597 ; GFX10-NEXT: v_mul_f32_e32 v1, 0xc1800000, v1
598 ; GFX10-NEXT: s_setpc_b64 s[30:31]
600 ; GFX11-LABEL: v_mul_neg16_v2f32:
602 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603 ; GFX11-NEXT: v_dual_mul_f32 v0, 0xc1800000, v0 :: v_dual_mul_f32 v1, 0xc1800000, v1
604 ; GFX11-NEXT: s_setpc_b64 s[30:31]
605 %mul = fmul <2 x float> %x, <float -16.0, float -16.0>
609 define <2 x float> @v_mul_fabs_16_v2f32(<2 x float> %x) {
610 ; GFX9-LABEL: v_mul_fabs_16_v2f32:
612 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613 ; GFX9-NEXT: s_mov_b32 s4, 0x41800000
614 ; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, s4
615 ; GFX9-NEXT: v_mul_f32_e64 v1, |v1|, s4
616 ; GFX9-NEXT: s_setpc_b64 s[30:31]
618 ; GFX1011-LABEL: v_mul_fabs_16_v2f32:
620 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX1011-NEXT: v_mul_f32_e64 v0, 0x41800000, |v0|
622 ; GFX1011-NEXT: v_mul_f32_e64 v1, 0x41800000, |v1|
623 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
624 %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
625 %mul = fmul <2 x float> %x.fabs, <float 16.0, float 16.0>
629 define <2 x float> @v_fma_mul_add_32_v2f32(<2 x float> %x, <2 x float> %y) {
630 ; GFX9-LABEL: v_fma_mul_add_32_v2f32:
632 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; GFX9-NEXT: s_mov_b32 s4, 0x42000000
634 ; GFX9-NEXT: v_fma_f32 v0, v0, s4, v2
635 ; GFX9-NEXT: v_fma_f32 v1, v1, s4, v3
636 ; GFX9-NEXT: s_setpc_b64 s[30:31]
638 ; GFX10-LABEL: v_fma_mul_add_32_v2f32:
640 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641 ; GFX10-NEXT: v_fmamk_f32 v0, v0, 0x42000000, v2
642 ; GFX10-NEXT: v_fmamk_f32 v1, v1, 0x42000000, v3
643 ; GFX10-NEXT: s_setpc_b64 s[30:31]
645 ; GFX11-LABEL: v_fma_mul_add_32_v2f32:
647 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GFX11-NEXT: v_dual_fmamk_f32 v0, v0, 0x42000000, v2 :: v_dual_fmamk_f32 v1, v1, 0x42000000, v3
649 ; GFX11-NEXT: s_setpc_b64 s[30:31]
650 %mul = fmul contract <2 x float> %x, <float 32.0, float 32.0>
651 %fma = fadd contract <2 x float> %mul, %y
655 define amdgpu_ps i32 @s_mul_neg64_f32(float inreg %x) {
656 ; GFX9-LABEL: s_mul_neg64_f32:
658 ; GFX9-NEXT: v_mov_b32_e32 v0, 0xc2800000
659 ; GFX9-NEXT: v_mul_f32_e32 v0, s0, v0
660 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
661 ; GFX9-NEXT: ; return to shader part epilog
663 ; GFX1011-LABEL: s_mul_neg64_f32:
665 ; GFX1011-NEXT: v_mul_f32_e64 v0, 0xc2800000, s0
666 ; GFX1011-NEXT: v_readfirstlane_b32 s0, v0
667 ; GFX1011-NEXT: ; return to shader part epilog
668 %mul = fmul float %x, -64.0
669 %cast = bitcast float %mul to i32
670 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
671 ret i32 %readfirstlane
674 define amdgpu_ps i32 @s_mul_32_f32(float inreg %x) {
675 ; GFX9-LABEL: s_mul_32_f32:
677 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x42000000
678 ; GFX9-NEXT: v_mul_f32_e32 v0, s0, v0
679 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
680 ; GFX9-NEXT: ; return to shader part epilog
682 ; GFX1011-LABEL: s_mul_32_f32:
684 ; GFX1011-NEXT: v_mul_f32_e64 v0, 0x42000000, s0
685 ; GFX1011-NEXT: v_readfirstlane_b32 s0, v0
686 ; GFX1011-NEXT: ; return to shader part epilog
687 %mul = fmul float %x, 32.0
688 %cast = bitcast float %mul to i32
689 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
690 ret i32 %readfirstlane
693 define amdgpu_ps i32 @s_mul_fma_32_f32(float inreg %x, float inreg %y) {
694 ; GFX9-LABEL: s_mul_fma_32_f32:
696 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x42000000
697 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
698 ; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1
699 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
700 ; GFX9-NEXT: ; return to shader part epilog
702 ; GFX1011-LABEL: s_mul_fma_32_f32:
704 ; GFX1011-NEXT: v_mov_b32_e32 v0, s1
705 ; GFX1011-NEXT: v_fmac_f32_e64 v0, 0x42000000, s0
706 ; GFX1011-NEXT: v_readfirstlane_b32 s0, v0
707 ; GFX1011-NEXT: ; return to shader part epilog
708 %mul = fmul contract float %x, 32.0
709 %fma = fadd contract float %mul, %y
710 %cast = bitcast float %fma to i32
711 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
712 ret i32 %readfirstlane
716 define double @v_mul_0x1pn1031_f64(double %x) {
717 ; GFX9-SDAG-LABEL: v_mul_0x1pn1031_f64:
718 ; GFX9-SDAG: ; %bb.0:
719 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfbf9
721 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
722 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
724 ; GFX9-GISEL-LABEL: v_mul_0x1pn1031_f64:
725 ; GFX9-GISEL: ; %bb.0:
726 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
728 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x800
729 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
730 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
732 ; GFX10-SDAG-LABEL: v_mul_0x1pn1031_f64:
733 ; GFX10-SDAG: ; %bb.0:
734 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffbf9
736 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
738 ; GFX10-GISEL-LABEL: v_mul_0x1pn1031_f64:
739 ; GFX10-GISEL: ; %bb.0:
740 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
741 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, v[0:1]
742 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
744 ; GFX11-SDAG-LABEL: v_mul_0x1pn1031_f64:
745 ; GFX11-SDAG: ; %bb.0:
746 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffbf9
748 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
750 ; GFX11-GISEL-LABEL: v_mul_0x1pn1031_f64:
751 ; GFX11-GISEL: ; %bb.0:
752 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, v[0:1]
754 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
755 %mul = fmul double %x, 4.34584737989687770135e-311
760 define double @v_mul_0x1pn1022_f64(double %x) {
761 ; GFX9-SDAG-LABEL: v_mul_0x1pn1022_f64:
762 ; GFX9-SDAG: ; %bb.0:
763 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfc02
765 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
766 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
768 ; GFX9-GISEL-LABEL: v_mul_0x1pn1022_f64:
769 ; GFX9-GISEL: ; %bb.0:
770 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
772 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x100000
773 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
774 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
776 ; GFX10-SDAG-LABEL: v_mul_0x1pn1022_f64:
777 ; GFX10-SDAG: ; %bb.0:
778 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc02
780 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
782 ; GFX10-GISEL-LABEL: v_mul_0x1pn1022_f64:
783 ; GFX10-GISEL: ; %bb.0:
784 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x100000, v[0:1]
786 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
788 ; GFX11-SDAG-LABEL: v_mul_0x1pn1022_f64:
789 ; GFX11-SDAG: ; %bb.0:
790 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
791 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc02
792 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
794 ; GFX11-GISEL-LABEL: v_mul_0x1pn1022_f64:
795 ; GFX11-GISEL: ; %bb.0:
796 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x100000, v[0:1]
798 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
799 %mul = fmul double %x, 2.22507385850720138309e-308
804 define double @v_mul_0x1pn1021_f64(double %x) {
805 ; GFX9-SDAG-LABEL: v_mul_0x1pn1021_f64:
806 ; GFX9-SDAG: ; %bb.0:
807 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfc03
809 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
810 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
812 ; GFX9-GISEL-LABEL: v_mul_0x1pn1021_f64:
813 ; GFX9-GISEL: ; %bb.0:
814 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
816 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x200000
817 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
818 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
820 ; GFX10-SDAG-LABEL: v_mul_0x1pn1021_f64:
821 ; GFX10-SDAG: ; %bb.0:
822 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc03
824 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
826 ; GFX10-GISEL-LABEL: v_mul_0x1pn1021_f64:
827 ; GFX10-GISEL: ; %bb.0:
828 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x200000, v[0:1]
830 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
832 ; GFX11-SDAG-LABEL: v_mul_0x1pn1021_f64:
833 ; GFX11-SDAG: ; %bb.0:
834 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc03
836 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
838 ; GFX11-GISEL-LABEL: v_mul_0x1pn1021_f64:
839 ; GFX11-GISEL: ; %bb.0:
840 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x200000, v[0:1]
842 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
843 %mul = fmul double %x, 4.45014771701440276618e-308
848 define double @v_mul_0x1pn64_f64(double %x) {
849 ; GFX9-SDAG-LABEL: v_mul_0x1pn64_f64:
850 ; GFX9-SDAG: ; %bb.0:
851 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
852 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffc0
853 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
854 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
856 ; GFX9-GISEL-LABEL: v_mul_0x1pn64_f64:
857 ; GFX9-GISEL: ; %bb.0:
858 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
860 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3bf00000
861 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
862 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
864 ; GFX10-SDAG-LABEL: v_mul_0x1pn64_f64:
865 ; GFX10-SDAG: ; %bb.0:
866 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffc0
868 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
870 ; GFX10-GISEL-LABEL: v_mul_0x1pn64_f64:
871 ; GFX10-GISEL: ; %bb.0:
872 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3bf00000, v[0:1]
874 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
876 ; GFX11-SDAG-LABEL: v_mul_0x1pn64_f64:
877 ; GFX11-SDAG: ; %bb.0:
878 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffc0
880 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
882 ; GFX11-GISEL-LABEL: v_mul_0x1pn64_f64:
883 ; GFX11-GISEL: ; %bb.0:
884 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3bf00000, v[0:1]
886 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
887 %mul = fmul double %x, 5.42101086242752217004e-20
892 define double @v_mul_0x1pn17_f64(double %x) {
893 ; GFX9-SDAG-LABEL: v_mul_0x1pn17_f64:
894 ; GFX9-SDAG: ; %bb.0:
895 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffef
897 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
898 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
900 ; GFX9-GISEL-LABEL: v_mul_0x1pn17_f64:
901 ; GFX9-GISEL: ; %bb.0:
902 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
904 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3ee00000
905 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
906 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
908 ; GFX10-SDAG-LABEL: v_mul_0x1pn17_f64:
909 ; GFX10-SDAG: ; %bb.0:
910 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffef
912 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
914 ; GFX10-GISEL-LABEL: v_mul_0x1pn17_f64:
915 ; GFX10-GISEL: ; %bb.0:
916 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ee00000, v[0:1]
918 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
920 ; GFX11-SDAG-LABEL: v_mul_0x1pn17_f64:
921 ; GFX11-SDAG: ; %bb.0:
922 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffef
924 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
926 ; GFX11-GISEL-LABEL: v_mul_0x1pn17_f64:
927 ; GFX11-GISEL: ; %bb.0:
928 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ee00000, v[0:1]
930 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
931 %mul = fmul double %x, 0.00000762939453125
936 define double @v_mul_0x1pn16_f64(double %x) {
937 ; GFX9-SDAG-LABEL: v_mul_0x1pn16_f64:
938 ; GFX9-SDAG: ; %bb.0:
939 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16
941 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
943 ; GFX9-GISEL-LABEL: v_mul_0x1pn16_f64:
944 ; GFX9-GISEL: ; %bb.0:
945 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
947 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3ef00000
948 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
949 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
951 ; GFX10-SDAG-LABEL: v_mul_0x1pn16_f64:
952 ; GFX10-SDAG: ; %bb.0:
953 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16
955 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
957 ; GFX10-GISEL-LABEL: v_mul_0x1pn16_f64:
958 ; GFX10-GISEL: ; %bb.0:
959 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ef00000, v[0:1]
961 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
963 ; GFX11-SDAG-LABEL: v_mul_0x1pn16_f64:
964 ; GFX11-SDAG: ; %bb.0:
965 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16
967 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
969 ; GFX11-GISEL-LABEL: v_mul_0x1pn16_f64:
970 ; GFX11-GISEL: ; %bb.0:
971 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ef00000, v[0:1]
973 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
974 %mul = fmul double %x, 0.0000152587890625
979 define double @v_mul_0x1pn15_f64(double %x) {
980 ; GFX9-SDAG-LABEL: v_mul_0x1pn15_f64:
981 ; GFX9-SDAG: ; %bb.0:
982 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15
984 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
986 ; GFX9-GISEL-LABEL: v_mul_0x1pn15_f64:
987 ; GFX9-GISEL: ; %bb.0:
988 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
990 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0.5
991 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
992 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
994 ; GFX10-SDAG-LABEL: v_mul_0x1pn15_f64:
995 ; GFX10-SDAG: ; %bb.0:
996 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15
998 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1000 ; GFX10-GISEL-LABEL: v_mul_0x1pn15_f64:
1001 ; GFX10-GISEL: ; %bb.0:
1002 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3f000000, v[0:1]
1004 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1006 ; GFX11-SDAG-LABEL: v_mul_0x1pn15_f64:
1007 ; GFX11-SDAG: ; %bb.0:
1008 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15
1010 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1012 ; GFX11-GISEL-LABEL: v_mul_0x1pn15_f64:
1013 ; GFX11-GISEL: ; %bb.0:
1014 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3f000000, v[0:1]
1016 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1017 %mul = fmul double %x, 0.000030517578125
1021 define double @v_mul_neg256_f64(double %x) {
1022 ; GFX9-SDAG-LABEL: v_mul_neg256_f64:
1023 ; GFX9-SDAG: ; %bb.0:
1024 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
1026 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1028 ; GFX9-GISEL-LABEL: v_mul_neg256_f64:
1029 ; GFX9-GISEL: ; %bb.0:
1030 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1032 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0700000
1033 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1034 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1036 ; GFX10-SDAG-LABEL: v_mul_neg256_f64:
1037 ; GFX10-SDAG: ; %bb.0:
1038 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
1040 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1042 ; GFX10-GISEL-LABEL: v_mul_neg256_f64:
1043 ; GFX10-GISEL: ; %bb.0:
1044 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, v[0:1]
1046 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1048 ; GFX11-SDAG-LABEL: v_mul_neg256_f64:
1049 ; GFX11-SDAG: ; %bb.0:
1050 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
1052 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1054 ; GFX11-GISEL-LABEL: v_mul_neg256_f64:
1055 ; GFX11-GISEL: ; %bb.0:
1056 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, v[0:1]
1058 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1059 %mul = fmul double %x, -256.0
1063 define double @v_mul_neg128_f64(double %x) {
1064 ; GFX9-SDAG-LABEL: v_mul_neg128_f64:
1065 ; GFX9-SDAG: ; %bb.0:
1066 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
1068 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1070 ; GFX9-GISEL-LABEL: v_mul_neg128_f64:
1071 ; GFX9-GISEL: ; %bb.0:
1072 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1074 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0600000
1075 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1076 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1078 ; GFX10-SDAG-LABEL: v_mul_neg128_f64:
1079 ; GFX10-SDAG: ; %bb.0:
1080 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
1082 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1084 ; GFX10-GISEL-LABEL: v_mul_neg128_f64:
1085 ; GFX10-GISEL: ; %bb.0:
1086 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0600000, v[0:1]
1088 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1090 ; GFX11-SDAG-LABEL: v_mul_neg128_f64:
1091 ; GFX11-SDAG: ; %bb.0:
1092 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
1094 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1096 ; GFX11-GISEL-LABEL: v_mul_neg128_f64:
1097 ; GFX11-GISEL: ; %bb.0:
1098 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0600000, v[0:1]
1100 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1101 %mul = fmul double %x, -128.0
1105 define double @v_mul_neg64_f64(double %x) {
1106 ; GFX9-SDAG-LABEL: v_mul_neg64_f64:
1107 ; GFX9-SDAG: ; %bb.0:
1108 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
1110 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1112 ; GFX9-GISEL-LABEL: v_mul_neg64_f64:
1113 ; GFX9-GISEL: ; %bb.0:
1114 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1115 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1116 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0500000
1117 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1118 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1120 ; GFX10-SDAG-LABEL: v_mul_neg64_f64:
1121 ; GFX10-SDAG: ; %bb.0:
1122 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
1124 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1126 ; GFX10-GISEL-LABEL: v_mul_neg64_f64:
1127 ; GFX10-GISEL: ; %bb.0:
1128 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0500000, v[0:1]
1130 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1132 ; GFX11-SDAG-LABEL: v_mul_neg64_f64:
1133 ; GFX11-SDAG: ; %bb.0:
1134 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
1136 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1138 ; GFX11-GISEL-LABEL: v_mul_neg64_f64:
1139 ; GFX11-GISEL: ; %bb.0:
1140 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1141 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0500000, v[0:1]
1142 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1143 %mul = fmul double %x, -64.0
1147 define double @v_mul_neg32_f64(double %x) {
1148 ; GFX9-SDAG-LABEL: v_mul_neg32_f64:
1149 ; GFX9-SDAG: ; %bb.0:
1150 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
1152 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1154 ; GFX9-GISEL-LABEL: v_mul_neg32_f64:
1155 ; GFX9-GISEL: ; %bb.0:
1156 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1158 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000
1159 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1160 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1162 ; GFX10-SDAG-LABEL: v_mul_neg32_f64:
1163 ; GFX10-SDAG: ; %bb.0:
1164 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
1166 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1168 ; GFX10-GISEL-LABEL: v_mul_neg32_f64:
1169 ; GFX10-GISEL: ; %bb.0:
1170 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1171 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0400000, v[0:1]
1172 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1174 ; GFX11-SDAG-LABEL: v_mul_neg32_f64:
1175 ; GFX11-SDAG: ; %bb.0:
1176 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
1178 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1180 ; GFX11-GISEL-LABEL: v_mul_neg32_f64:
1181 ; GFX11-GISEL: ; %bb.0:
1182 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0400000, v[0:1]
1184 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1185 %mul = fmul double %x, -32.0
1189 define double @v_mul_neg16_f64(double %x) {
1190 ; GFX9-SDAG-LABEL: v_mul_neg16_f64:
1191 ; GFX9-SDAG: ; %bb.0:
1192 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1193 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
1194 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1196 ; GFX9-GISEL-LABEL: v_mul_neg16_f64:
1197 ; GFX9-GISEL: ; %bb.0:
1198 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1200 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0300000
1201 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1202 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1204 ; GFX10-SDAG-LABEL: v_mul_neg16_f64:
1205 ; GFX10-SDAG: ; %bb.0:
1206 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1207 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
1208 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1210 ; GFX10-GISEL-LABEL: v_mul_neg16_f64:
1211 ; GFX10-GISEL: ; %bb.0:
1212 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1213 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1]
1214 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1216 ; GFX11-SDAG-LABEL: v_mul_neg16_f64:
1217 ; GFX11-SDAG: ; %bb.0:
1218 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
1220 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX11-GISEL-LABEL: v_mul_neg16_f64:
1223 ; GFX11-GISEL: ; %bb.0:
1224 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1]
1226 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1227 %mul = fmul double %x, -16.0
1231 define double @v_mul_neg8_f64(double %x) {
1232 ; GFX9-SDAG-LABEL: v_mul_neg8_f64:
1233 ; GFX9-SDAG: ; %bb.0:
1234 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
1236 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1238 ; GFX9-GISEL-LABEL: v_mul_neg8_f64:
1239 ; GFX9-GISEL: ; %bb.0:
1240 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1242 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0200000
1243 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1244 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1246 ; GFX10-SDAG-LABEL: v_mul_neg8_f64:
1247 ; GFX10-SDAG: ; %bb.0:
1248 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
1250 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX10-GISEL-LABEL: v_mul_neg8_f64:
1253 ; GFX10-GISEL: ; %bb.0:
1254 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, v[0:1]
1256 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1258 ; GFX11-SDAG-LABEL: v_mul_neg8_f64:
1259 ; GFX11-SDAG: ; %bb.0:
1260 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
1262 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1264 ; GFX11-GISEL-LABEL: v_mul_neg8_f64:
1265 ; GFX11-GISEL: ; %bb.0:
1266 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, v[0:1]
1268 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1269 %mul = fmul double %x, -8.0
1273 define double @v_mul_neg4_f64(double %x) {
1274 ; GCN-LABEL: v_mul_neg4_f64:
1276 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], -4.0
1278 ; GCN-NEXT: s_setpc_b64 s[30:31]
1279 %mul = fmul double %x, -4.0
1283 define double @v_mul_neg2_f64(double %x) {
1284 ; GCN-LABEL: v_mul_neg2_f64:
1286 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], -2.0
1288 ; GCN-NEXT: s_setpc_b64 s[30:31]
1289 %mul = fmul double %x, -2.0
1293 define double @v_mul_0_f64(double %x) {
1294 ; GCN-LABEL: v_mul_0_f64:
1296 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], 0
1298 ; GCN-NEXT: s_setpc_b64 s[30:31]
1299 %mul = fmul double %x, 0.0
1303 define double @v_mul_neg1_f64(double %x) {
1304 ; GFX9-SDAG-LABEL: v_mul_neg1_f64:
1305 ; GFX9-SDAG: ; %bb.0:
1306 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX9-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1308 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX9-GISEL-LABEL: v_mul_neg1_f64:
1311 ; GFX9-GISEL: ; %bb.0:
1312 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
1314 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1316 ; GFX10-SDAG-LABEL: v_mul_neg1_f64:
1317 ; GFX10-SDAG: ; %bb.0:
1318 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319 ; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1320 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1322 ; GFX10-GISEL-LABEL: v_mul_neg1_f64:
1323 ; GFX10-GISEL: ; %bb.0:
1324 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
1326 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1328 ; GFX11-SDAG-LABEL: v_mul_neg1_f64:
1329 ; GFX11-SDAG: ; %bb.0:
1330 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331 ; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1332 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1334 ; GFX11-GISEL-LABEL: v_mul_neg1_f64:
1335 ; GFX11-GISEL: ; %bb.0:
1336 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1337 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
1338 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1339 %mul = fmul double %x, -1.0
1343 define double @v_mul_neg_half_f64(double %x) {
1344 ; GCN-LABEL: v_mul_neg_half_f64:
1346 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], -0.5
1348 ; GCN-NEXT: s_setpc_b64 s[30:31]
1349 %mul = fmul double %x, -0.5
1353 define double @v_mul_neg_quarter_f64(double %x) {
1354 ; GFX9-SDAG-LABEL: v_mul_neg_quarter_f64:
1355 ; GFX9-SDAG: ; %bb.0:
1356 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1357 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
1358 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1360 ; GFX9-GISEL-LABEL: v_mul_neg_quarter_f64:
1361 ; GFX9-GISEL: ; %bb.0:
1362 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1363 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1364 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbfd00000
1365 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1366 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1368 ; GFX10-SDAG-LABEL: v_mul_neg_quarter_f64:
1369 ; GFX10-SDAG: ; %bb.0:
1370 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
1372 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1374 ; GFX10-GISEL-LABEL: v_mul_neg_quarter_f64:
1375 ; GFX10-GISEL: ; %bb.0:
1376 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, v[0:1]
1378 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1380 ; GFX11-SDAG-LABEL: v_mul_neg_quarter_f64:
1381 ; GFX11-SDAG: ; %bb.0:
1382 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
1384 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1386 ; GFX11-GISEL-LABEL: v_mul_neg_quarter_f64:
1387 ; GFX11-GISEL: ; %bb.0:
1388 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, v[0:1]
1390 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1391 %mul = fmul double %x, -0.25
1395 define double @v_mul_quarter_f64(double %x) {
1396 ; GFX9-SDAG-LABEL: v_mul_quarter_f64:
1397 ; GFX9-SDAG: ; %bb.0:
1398 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2
1400 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1402 ; GFX9-GISEL-LABEL: v_mul_quarter_f64:
1403 ; GFX9-GISEL: ; %bb.0:
1404 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1406 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fd00000
1407 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1408 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1410 ; GFX10-SDAG-LABEL: v_mul_quarter_f64:
1411 ; GFX10-SDAG: ; %bb.0:
1412 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2
1414 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1416 ; GFX10-GISEL-LABEL: v_mul_quarter_f64:
1417 ; GFX10-GISEL: ; %bb.0:
1418 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, v[0:1]
1420 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1422 ; GFX11-SDAG-LABEL: v_mul_quarter_f64:
1423 ; GFX11-SDAG: ; %bb.0:
1424 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2
1426 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1428 ; GFX11-GISEL-LABEL: v_mul_quarter_f64:
1429 ; GFX11-GISEL: ; %bb.0:
1430 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, v[0:1]
1432 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1433 %mul = fmul double %x, 0.25
1437 define double @v_mul_half_f64(double %x) {
1438 ; GCN-LABEL: v_mul_half_f64:
1440 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1441 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
1442 ; GCN-NEXT: s_setpc_b64 s[30:31]
1443 %mul = fmul double %x, 0.5
1447 define double @v_mul_1_f64(double %x) {
1448 ; GCN-LABEL: v_mul_1_f64:
1450 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451 ; GCN-NEXT: s_setpc_b64 s[30:31]
1452 %mul = fmul double %x, 1.0
1456 define double @v_mul_2_f64(double %x) {
1457 ; GFX9-SDAG-LABEL: v_mul_2_f64:
1458 ; GFX9-SDAG: ; %bb.0:
1459 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
1461 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1463 ; GFX9-GISEL-LABEL: v_mul_2_f64:
1464 ; GFX9-GISEL: ; %bb.0:
1465 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
1467 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1469 ; GFX10-SDAG-LABEL: v_mul_2_f64:
1470 ; GFX10-SDAG: ; %bb.0:
1471 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
1473 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1475 ; GFX10-GISEL-LABEL: v_mul_2_f64:
1476 ; GFX10-GISEL: ; %bb.0:
1477 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1478 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
1479 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1481 ; GFX11-SDAG-LABEL: v_mul_2_f64:
1482 ; GFX11-SDAG: ; %bb.0:
1483 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
1485 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1487 ; GFX11-GISEL-LABEL: v_mul_2_f64:
1488 ; GFX11-GISEL: ; %bb.0:
1489 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
1491 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1492 %mul = fmul double %x, 2.0
1496 define double @v_mul_4_f64(double %x) {
1497 ; GCN-LABEL: v_mul_4_f64:
1499 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500 ; GCN-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1501 ; GCN-NEXT: s_setpc_b64 s[30:31]
1502 %mul = fmul double %x, 4.0
1506 define double @v_mul_8_f64(double %x) {
1507 ; GFX9-SDAG-LABEL: v_mul_8_f64:
1508 ; GFX9-SDAG: ; %bb.0:
1509 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3
1511 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1513 ; GFX9-GISEL-LABEL: v_mul_8_f64:
1514 ; GFX9-GISEL: ; %bb.0:
1515 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1517 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40200000
1518 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1519 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1521 ; GFX10-SDAG-LABEL: v_mul_8_f64:
1522 ; GFX10-SDAG: ; %bb.0:
1523 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1524 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3
1525 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1527 ; GFX10-GISEL-LABEL: v_mul_8_f64:
1528 ; GFX10-GISEL: ; %bb.0:
1529 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, v[0:1]
1531 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1533 ; GFX11-SDAG-LABEL: v_mul_8_f64:
1534 ; GFX11-SDAG: ; %bb.0:
1535 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1536 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3
1537 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1539 ; GFX11-GISEL-LABEL: v_mul_8_f64:
1540 ; GFX11-GISEL: ; %bb.0:
1541 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1542 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, v[0:1]
1543 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1544 %mul = fmul double %x, 8.0
1548 define double @v_mul_16_f64(double %x) {
1549 ; GFX9-SDAG-LABEL: v_mul_16_f64:
1550 ; GFX9-SDAG: ; %bb.0:
1551 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1552 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
1553 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1555 ; GFX9-GISEL-LABEL: v_mul_16_f64:
1556 ; GFX9-GISEL: ; %bb.0:
1557 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1559 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40300000
1560 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1561 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1563 ; GFX10-SDAG-LABEL: v_mul_16_f64:
1564 ; GFX10-SDAG: ; %bb.0:
1565 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
1567 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1569 ; GFX10-GISEL-LABEL: v_mul_16_f64:
1570 ; GFX10-GISEL: ; %bb.0:
1571 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1572 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1]
1573 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1575 ; GFX11-SDAG-LABEL: v_mul_16_f64:
1576 ; GFX11-SDAG: ; %bb.0:
1577 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1578 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
1579 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1581 ; GFX11-GISEL-LABEL: v_mul_16_f64:
1582 ; GFX11-GISEL: ; %bb.0:
1583 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1584 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1]
1585 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1586 %mul = fmul double %x, 16.0
1590 define double @v_mul_32_f64(double %x) {
1591 ; GFX9-SDAG-LABEL: v_mul_32_f64:
1592 ; GFX9-SDAG: ; %bb.0:
1593 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
1595 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1597 ; GFX9-GISEL-LABEL: v_mul_32_f64:
1598 ; GFX9-GISEL: ; %bb.0:
1599 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1601 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000
1602 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1603 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1605 ; GFX10-SDAG-LABEL: v_mul_32_f64:
1606 ; GFX10-SDAG: ; %bb.0:
1607 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
1609 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1611 ; GFX10-GISEL-LABEL: v_mul_32_f64:
1612 ; GFX10-GISEL: ; %bb.0:
1613 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1614 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1]
1615 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1617 ; GFX11-SDAG-LABEL: v_mul_32_f64:
1618 ; GFX11-SDAG: ; %bb.0:
1619 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
1621 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1623 ; GFX11-GISEL-LABEL: v_mul_32_f64:
1624 ; GFX11-GISEL: ; %bb.0:
1625 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1]
1627 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1628 %mul = fmul double %x, 32.0
1632 define double @v_mul_64_f64(double %x) {
1633 ; GFX9-SDAG-LABEL: v_mul_64_f64:
1634 ; GFX9-SDAG: ; %bb.0:
1635 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6
1637 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1639 ; GFX9-GISEL-LABEL: v_mul_64_f64:
1640 ; GFX9-GISEL: ; %bb.0:
1641 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1642 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1643 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40500000
1644 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1645 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1647 ; GFX10-SDAG-LABEL: v_mul_64_f64:
1648 ; GFX10-SDAG: ; %bb.0:
1649 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6
1651 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1653 ; GFX10-GISEL-LABEL: v_mul_64_f64:
1654 ; GFX10-GISEL: ; %bb.0:
1655 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40500000, v[0:1]
1657 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1659 ; GFX11-SDAG-LABEL: v_mul_64_f64:
1660 ; GFX11-SDAG: ; %bb.0:
1661 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6
1663 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1665 ; GFX11-GISEL-LABEL: v_mul_64_f64:
1666 ; GFX11-GISEL: ; %bb.0:
1667 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40500000, v[0:1]
1669 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1670 %mul = fmul double %x, 64.0
1674 define double @v_mul_128_f64(double %x) {
1675 ; GFX9-SDAG-LABEL: v_mul_128_f64:
1676 ; GFX9-SDAG: ; %bb.0:
1677 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1678 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7
1679 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1681 ; GFX9-GISEL-LABEL: v_mul_128_f64:
1682 ; GFX9-GISEL: ; %bb.0:
1683 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1684 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1685 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40600000
1686 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1687 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1689 ; GFX10-SDAG-LABEL: v_mul_128_f64:
1690 ; GFX10-SDAG: ; %bb.0:
1691 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1692 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7
1693 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1695 ; GFX10-GISEL-LABEL: v_mul_128_f64:
1696 ; GFX10-GISEL: ; %bb.0:
1697 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40600000, v[0:1]
1699 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1701 ; GFX11-SDAG-LABEL: v_mul_128_f64:
1702 ; GFX11-SDAG: ; %bb.0:
1703 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7
1705 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1707 ; GFX11-GISEL-LABEL: v_mul_128_f64:
1708 ; GFX11-GISEL: ; %bb.0:
1709 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1710 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40600000, v[0:1]
1711 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1712 %mul = fmul double %x, 128.0
1716 define double @v_mul_256_f64(double %x) {
1717 ; GFX9-SDAG-LABEL: v_mul_256_f64:
1718 ; GFX9-SDAG: ; %bb.0:
1719 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1720 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8
1721 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1723 ; GFX9-GISEL-LABEL: v_mul_256_f64:
1724 ; GFX9-GISEL: ; %bb.0:
1725 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1727 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40700000
1728 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1729 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1731 ; GFX10-SDAG-LABEL: v_mul_256_f64:
1732 ; GFX10-SDAG: ; %bb.0:
1733 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1734 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8
1735 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1737 ; GFX10-GISEL-LABEL: v_mul_256_f64:
1738 ; GFX10-GISEL: ; %bb.0:
1739 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1740 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40700000, v[0:1]
1741 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1743 ; GFX11-SDAG-LABEL: v_mul_256_f64:
1744 ; GFX11-SDAG: ; %bb.0:
1745 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1746 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8
1747 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1749 ; GFX11-GISEL-LABEL: v_mul_256_f64:
1750 ; GFX11-GISEL: ; %bb.0:
1751 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1752 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40700000, v[0:1]
1753 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1754 %mul = fmul double %x, 256.0
1759 define double @v_mul_0x1p63_f64(double %x) {
1760 ; GFX9-SDAG-LABEL: v_mul_0x1p63_f64:
1761 ; GFX9-SDAG: ; %bb.0:
1762 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1763 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63
1764 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1766 ; GFX9-GISEL-LABEL: v_mul_0x1p63_f64:
1767 ; GFX9-GISEL: ; %bb.0:
1768 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1769 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1770 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43e00000
1771 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1772 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1774 ; GFX10-SDAG-LABEL: v_mul_0x1p63_f64:
1775 ; GFX10-SDAG: ; %bb.0:
1776 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1777 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63
1778 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1780 ; GFX10-GISEL-LABEL: v_mul_0x1p63_f64:
1781 ; GFX10-GISEL: ; %bb.0:
1782 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x43e00000, v[0:1]
1784 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1786 ; GFX11-SDAG-LABEL: v_mul_0x1p63_f64:
1787 ; GFX11-SDAG: ; %bb.0:
1788 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63
1790 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1792 ; GFX11-GISEL-LABEL: v_mul_0x1p63_f64:
1793 ; GFX11-GISEL: ; %bb.0:
1794 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x43e00000, v[0:1]
1796 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1797 %mul = fmul double %x, 9223372036854775808.0
1802 define double @v_mul_0x1p64_f64(double %x) {
1803 ; GFX9-SDAG-LABEL: v_mul_0x1p64_f64:
1804 ; GFX9-SDAG: ; %bb.0:
1805 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1806 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
1807 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1809 ; GFX9-GISEL-LABEL: v_mul_0x1p64_f64:
1810 ; GFX9-GISEL: ; %bb.0:
1811 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1813 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43f00000
1814 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1815 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1817 ; GFX10-SDAG-LABEL: v_mul_0x1p64_f64:
1818 ; GFX10-SDAG: ; %bb.0:
1819 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
1821 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1823 ; GFX10-GISEL-LABEL: v_mul_0x1p64_f64:
1824 ; GFX10-GISEL: ; %bb.0:
1825 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x43f00000, v[0:1]
1827 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1829 ; GFX11-SDAG-LABEL: v_mul_0x1p64_f64:
1830 ; GFX11-SDAG: ; %bb.0:
1831 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
1833 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1835 ; GFX11-GISEL-LABEL: v_mul_0x1p64_f64:
1836 ; GFX11-GISEL: ; %bb.0:
1837 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x43f00000, v[0:1]
1839 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1840 %mul = fmul double %x, 18446744073709551616.0
1845 define double @v_mul_0x1p65_f64(double %x) {
1846 ; GFX9-SDAG-LABEL: v_mul_0x1p65_f64:
1847 ; GFX9-SDAG: ; %bb.0:
1848 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1849 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x41
1850 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
1851 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1853 ; GFX9-GISEL-LABEL: v_mul_0x1p65_f64:
1854 ; GFX9-GISEL: ; %bb.0:
1855 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1856 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1857 ; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v3, 34
1858 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1859 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1861 ; GFX10-SDAG-LABEL: v_mul_0x1p65_f64:
1862 ; GFX10-SDAG: ; %bb.0:
1863 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x41
1865 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1867 ; GFX10-GISEL-LABEL: v_mul_0x1p65_f64:
1868 ; GFX10-GISEL: ; %bb.0:
1869 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, v[0:1]
1871 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1873 ; GFX11-SDAG-LABEL: v_mul_0x1p65_f64:
1874 ; GFX11-SDAG: ; %bb.0:
1875 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1876 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x41
1877 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1879 ; GFX11-GISEL-LABEL: v_mul_0x1p65_f64:
1880 ; GFX11-GISEL: ; %bb.0:
1881 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1882 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, v[0:1]
1883 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1884 %mul = fmul double %x, 36893488147419103232.0
1888 define amdgpu_ps <2 x i32> @s_mul_0x1p65_f64(double inreg %x, double inreg %y) {
1889 ; GFX9-SDAG-LABEL: s_mul_0x1p65_f64:
1890 ; GFX9-SDAG: ; %bb.0:
1891 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x41
1892 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
1893 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s0, v0
1894 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s1, v1
1895 ; GFX9-SDAG-NEXT: ; return to shader part epilog
1897 ; GFX9-GISEL-LABEL: s_mul_0x1p65_f64:
1898 ; GFX9-GISEL: ; %bb.0:
1899 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
1900 ; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v1, 34
1901 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
1902 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s0, v0
1903 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1
1904 ; GFX9-GISEL-NEXT: ; return to shader part epilog
1906 ; GFX10-SDAG-LABEL: s_mul_0x1p65_f64:
1907 ; GFX10-SDAG: ; %bb.0:
1908 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 0x41
1909 ; GFX10-SDAG-NEXT: v_readfirstlane_b32 s0, v0
1910 ; GFX10-SDAG-NEXT: v_readfirstlane_b32 s1, v1
1911 ; GFX10-SDAG-NEXT: ; return to shader part epilog
1913 ; GFX10-GISEL-LABEL: s_mul_0x1p65_f64:
1914 ; GFX10-GISEL: ; %bb.0:
1915 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, s[0:1]
1916 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
1917 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
1918 ; GFX10-GISEL-NEXT: ; return to shader part epilog
1920 ; GFX11-SDAG-LABEL: s_mul_0x1p65_f64:
1921 ; GFX11-SDAG: ; %bb.0:
1922 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 0x41
1923 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
1924 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1
1925 ; GFX11-SDAG-NEXT: ; return to shader part epilog
1927 ; GFX11-GISEL-LABEL: s_mul_0x1p65_f64:
1928 ; GFX11-GISEL: ; %bb.0:
1929 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, s[0:1]
1930 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
1931 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
1932 ; GFX11-GISEL-NEXT: ; return to shader part epilog
1933 %mul = fmul contract double %x, 36893488147419103232.0
1934 %cast = bitcast double %mul to <2 x i32>
1935 %cast.0 = extractelement <2 x i32> %cast, i32 0
1936 %cast.1 = extractelement <2 x i32> %cast, i32 1
1937 %readlane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
1938 %readlane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
1939 %insert.0 = insertelement <2 x i32> poison, i32 %readlane.0, i32 0
1940 %insert.1 = insertelement <2 x i32> %insert.0, i32 %readlane.1, i32 1
1941 ret <2 x i32> %insert.1
1945 define double @v_mul_0x1p128_f64(double %x) {
1946 ; GFX9-SDAG-LABEL: v_mul_0x1p128_f64:
1947 ; GFX9-SDAG: ; %bb.0:
1948 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1949 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x80
1950 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
1951 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1953 ; GFX9-GISEL-LABEL: v_mul_0x1p128_f64:
1954 ; GFX9-GISEL: ; %bb.0:
1955 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1956 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
1957 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x47f00000
1958 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
1959 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1961 ; GFX10-SDAG-LABEL: v_mul_0x1p128_f64:
1962 ; GFX10-SDAG: ; %bb.0:
1963 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x80
1965 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1967 ; GFX10-GISEL-LABEL: v_mul_0x1p128_f64:
1968 ; GFX10-GISEL: ; %bb.0:
1969 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1970 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x47f00000, v[0:1]
1971 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1973 ; GFX11-SDAG-LABEL: v_mul_0x1p128_f64:
1974 ; GFX11-SDAG: ; %bb.0:
1975 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1976 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x80
1977 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1979 ; GFX11-GISEL-LABEL: v_mul_0x1p128_f64:
1980 ; GFX11-GISEL: ; %bb.0:
1981 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x47f00000, v[0:1]
1983 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1984 %mul = fmul double %x, 3.40282366920938463463e+38
1989 define double @v_mul_0x1p1022_f64(double %x) {
1990 ; GFX9-SDAG-LABEL: v_mul_0x1p1022_f64:
1991 ; GFX9-SDAG: ; %bb.0:
1992 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x3fe
1994 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
1995 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1997 ; GFX9-GISEL-LABEL: v_mul_0x1p1022_f64:
1998 ; GFX9-GISEL: ; %bb.0:
1999 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2000 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
2001 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fd00000
2002 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
2003 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2005 ; GFX10-SDAG-LABEL: v_mul_0x1p1022_f64:
2006 ; GFX10-SDAG: ; %bb.0:
2007 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3fe
2009 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2011 ; GFX10-GISEL-LABEL: v_mul_0x1p1022_f64:
2012 ; GFX10-GISEL: ; %bb.0:
2013 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2014 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fd00000, v[0:1]
2015 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2017 ; GFX11-SDAG-LABEL: v_mul_0x1p1022_f64:
2018 ; GFX11-SDAG: ; %bb.0:
2019 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3fe
2021 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2023 ; GFX11-GISEL-LABEL: v_mul_0x1p1022_f64:
2024 ; GFX11-GISEL: ; %bb.0:
2025 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2026 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fd00000, v[0:1]
2027 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2028 %mul = fmul double %x, 4.49423283715578976932e+307
2033 define double @v_mul_0x1p1023_f64(double %x) {
2034 ; GFX9-SDAG-LABEL: v_mul_0x1p1023_f64:
2035 ; GFX9-SDAG: ; %bb.0:
2036 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2037 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x3ff
2038 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4
2039 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2041 ; GFX9-GISEL-LABEL: v_mul_0x1p1023_f64:
2042 ; GFX9-GISEL: ; %bb.0:
2043 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
2045 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fe00000
2046 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
2047 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2049 ; GFX10-SDAG-LABEL: v_mul_0x1p1023_f64:
2050 ; GFX10-SDAG: ; %bb.0:
2051 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3ff
2053 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2055 ; GFX10-GISEL-LABEL: v_mul_0x1p1023_f64:
2056 ; GFX10-GISEL: ; %bb.0:
2057 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2058 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fe00000, v[0:1]
2059 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2061 ; GFX11-SDAG-LABEL: v_mul_0x1p1023_f64:
2062 ; GFX11-SDAG: ; %bb.0:
2063 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2064 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3ff
2065 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2067 ; GFX11-GISEL-LABEL: v_mul_0x1p1023_f64:
2068 ; GFX11-GISEL: ; %bb.0:
2069 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fe00000, v[0:1]
2071 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2072 %mul = fmul double %x, 8.98846567431157953865e+307
2076 ; Check that this doesn't interfer with fma formation
2077 define double @v_fma_mul_add_32_f64(double %x, double %y) {
2078 ; GFX9-SDAG-LABEL: v_fma_mul_add_32_f64:
2079 ; GFX9-SDAG: ; %bb.0:
2080 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2081 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
2082 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000
2083 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3]
2084 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2086 ; GFX9-GISEL-LABEL: v_fma_mul_add_32_f64:
2087 ; GFX9-GISEL: ; %bb.0:
2088 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2089 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
2090 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000
2091 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
2092 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2094 ; GFX1011-LABEL: v_fma_mul_add_32_f64:
2096 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097 ; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[2:3]
2098 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
2099 %mul = fmul contract double %x, 32.0
2100 %fma = fadd contract double %mul, %y
2104 define <2 x double> @v_fma_mul_add_32_v2f64(<2 x double> %x, <2 x double> %y) {
2105 ; GFX9-LABEL: v_fma_mul_add_32_v2f64:
2107 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2108 ; GFX9-NEXT: s_mov_b32 s4, 0
2109 ; GFX9-NEXT: s_mov_b32 s5, 0x40400000
2110 ; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[4:5]
2111 ; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7]
2112 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2114 ; GFX10-SDAG-LABEL: v_fma_mul_add_32_v2f64:
2115 ; GFX10-SDAG: ; %bb.0:
2116 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2117 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5]
2118 ; GFX10-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7]
2119 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2121 ; GFX10-GISEL-LABEL: v_fma_mul_add_32_v2f64:
2122 ; GFX10-GISEL: ; %bb.0:
2123 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5]
2125 ; GFX10-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7]
2126 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2128 ; GFX11-SDAG-LABEL: v_fma_mul_add_32_v2f64:
2129 ; GFX11-SDAG: ; %bb.0:
2130 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2131 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5]
2132 ; GFX11-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7]
2133 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2135 ; GFX11-GISEL-LABEL: v_fma_mul_add_32_v2f64:
2136 ; GFX11-GISEL: ; %bb.0:
2137 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2138 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5]
2139 ; GFX11-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7]
2140 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2141 %mul = fmul contract <2 x double> %x, <double 32.0, double 32.0>
2142 %fma = fadd contract <2 x double> %mul, %y
2143 ret <2 x double> %fma
2146 define <2 x double> @v_fma_mul_add_2_v2f64(<2 x double> %x, <2 x double> %y) {
2147 ; GCN-LABEL: v_fma_mul_add_2_v2f64:
2149 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150 ; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], 2.0, v[4:5]
2151 ; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[6:7]
2152 ; GCN-NEXT: s_setpc_b64 s[30:31]
2153 %mul = fmul contract <2 x double> %x, <double 2.0, double 2.0>
2154 %fma = fadd contract <2 x double> %mul, %y
2155 ret <2 x double> %fma
2158 define <2 x double> @v_fma_mul_add_4_v2f64(<2 x double> %x, <2 x double> %y) {
2159 ; GCN-LABEL: v_fma_mul_add_4_v2f64:
2161 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2162 ; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], 4.0, v[4:5]
2163 ; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], 4.0, v[6:7]
2164 ; GCN-NEXT: s_setpc_b64 s[30:31]
2165 %mul = fmul contract <2 x double> %x, <double 4.0, double 4.0>
2166 %fma = fadd contract <2 x double> %mul, %y
2167 ret <2 x double> %fma
2170 define double @v_fma_mul_add_2_f64(double %x, double %y) {
2171 ; GCN-LABEL: v_fma_mul_add_2_f64:
2173 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2174 ; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], 2.0, v[2:3]
2175 ; GCN-NEXT: s_setpc_b64 s[30:31]
2176 %mul = fmul contract double %x, 2.0
2177 %fma = fadd contract double %mul, %y
2181 define double @v_fma_mul_add_4_f64(double %x, double %y) {
2182 ; GCN-LABEL: v_fma_mul_add_4_f64:
2184 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2185 ; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], 4.0, v[2:3]
2186 ; GCN-NEXT: s_setpc_b64 s[30:31]
2187 %mul = fmul contract double %x, 4.0
2188 %fma = fadd contract double %mul, %y
2192 define double @v_fma_mul_add_neg4_f64(double %x, double %y) {
2193 ; GCN-LABEL: v_fma_mul_add_neg4_f64:
2195 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196 ; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], -4.0, v[2:3]
2197 ; GCN-NEXT: s_setpc_b64 s[30:31]
2198 %mul = fmul contract double %x, -4.0
2199 %fma = fadd contract double %mul, %y
2203 define double @v_mul_add_32_f64(double %x, double %y) {
2204 ; GFX9-SDAG-LABEL: v_mul_add_32_f64:
2205 ; GFX9-SDAG: ; %bb.0:
2206 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2207 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
2208 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2209 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2211 ; GFX9-GISEL-LABEL: v_mul_add_32_f64:
2212 ; GFX9-GISEL: ; %bb.0:
2213 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
2215 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000
2216 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
2217 ; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2218 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2220 ; GFX10-SDAG-LABEL: v_mul_add_32_f64:
2221 ; GFX10-SDAG: ; %bb.0:
2222 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
2224 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2225 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2227 ; GFX10-GISEL-LABEL: v_mul_add_32_f64:
2228 ; GFX10-GISEL: ; %bb.0:
2229 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2230 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1]
2231 ; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2232 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2234 ; GFX11-SDAG-LABEL: v_mul_add_32_f64:
2235 ; GFX11-SDAG: ; %bb.0:
2236 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2237 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
2238 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2239 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2241 ; GFX11-GISEL-LABEL: v_mul_add_32_f64:
2242 ; GFX11-GISEL: ; %bb.0:
2243 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1]
2245 ; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2246 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2247 %mul = fmul double %x, 32.0
2248 %fma = fadd double %mul, %y
2252 define double @v_mul_add_2_f64(double %x, double %y) {
2253 ; GFX9-SDAG-LABEL: v_mul_add_2_f64:
2254 ; GFX9-SDAG: ; %bb.0:
2255 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2256 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2257 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2258 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2260 ; GFX9-GISEL-LABEL: v_mul_add_2_f64:
2261 ; GFX9-GISEL: ; %bb.0:
2262 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2264 ; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2265 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2267 ; GFX10-SDAG-LABEL: v_mul_add_2_f64:
2268 ; GFX10-SDAG: ; %bb.0:
2269 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2271 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2272 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2274 ; GFX10-GISEL-LABEL: v_mul_add_2_f64:
2275 ; GFX10-GISEL: ; %bb.0:
2276 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2277 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2278 ; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2279 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2281 ; GFX11-SDAG-LABEL: v_mul_add_2_f64:
2282 ; GFX11-SDAG: ; %bb.0:
2283 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2285 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2286 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2288 ; GFX11-GISEL-LABEL: v_mul_add_2_f64:
2289 ; GFX11-GISEL: ; %bb.0:
2290 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2292 ; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2293 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2294 %mul = fmul double %x, 2.0
2295 %fma = fadd double %mul, %y
2299 define double @v_mul_add_4_f64(double %x, double %y) {
2300 ; GFX9-SDAG-LABEL: v_mul_add_4_f64:
2301 ; GFX9-SDAG: ; %bb.0:
2302 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2303 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2304 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2305 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2307 ; GFX9-GISEL-LABEL: v_mul_add_4_f64:
2308 ; GFX9-GISEL: ; %bb.0:
2309 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2311 ; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2312 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2314 ; GFX10-SDAG-LABEL: v_mul_add_4_f64:
2315 ; GFX10-SDAG: ; %bb.0:
2316 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2317 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2318 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2319 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2321 ; GFX10-GISEL-LABEL: v_mul_add_4_f64:
2322 ; GFX10-GISEL: ; %bb.0:
2323 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2324 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2325 ; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2326 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2328 ; GFX11-SDAG-LABEL: v_mul_add_4_f64:
2329 ; GFX11-SDAG: ; %bb.0:
2330 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
2332 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2333 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2335 ; GFX11-GISEL-LABEL: v_mul_add_4_f64:
2336 ; GFX11-GISEL: ; %bb.0:
2337 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2338 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0
2339 ; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2340 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2341 %mul = fmul double %x, 2.0
2342 %fma = fadd double %mul, %y
2346 define double @v_fma_mul_sub_32_f64(double %x, double %y) {
2347 ; GFX9-SDAG-LABEL: v_fma_mul_sub_32_f64:
2348 ; GFX9-SDAG: ; %bb.0:
2349 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
2351 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000
2352 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], -v[2:3]
2353 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2355 ; GFX9-GISEL-LABEL: v_fma_mul_sub_32_f64:
2356 ; GFX9-GISEL: ; %bb.0:
2357 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
2359 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000
2360 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], -v[2:3]
2361 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2363 ; GFX1011-LABEL: v_fma_mul_sub_32_f64:
2365 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366 ; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], -v[2:3]
2367 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
2368 %mul = fmul contract double %x, 32.0
2369 %fma = fsub contract double %mul, %y
2373 define double @v_fma_mul_add_neg32_f64(double %x, double %y) {
2374 ; GFX9-SDAG-LABEL: v_fma_mul_add_neg32_f64:
2375 ; GFX9-SDAG: ; %bb.0:
2376 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
2378 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc0400000
2379 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3]
2380 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2382 ; GFX9-GISEL-LABEL: v_fma_mul_add_neg32_f64:
2383 ; GFX9-GISEL: ; %bb.0:
2384 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
2386 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0400000
2387 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
2388 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2390 ; GFX1011-LABEL: v_fma_mul_add_neg32_f64:
2392 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2393 ; GFX1011-NEXT: v_fma_f64 v[0:1], 0xc0400000, v[0:1], v[2:3]
2394 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
2395 %mul = fmul contract double %x, -32.0
2396 %fma = fadd contract double %mul, %y
2400 define double @v_mul_fabs_32_f64(double %x) {
2401 ; GFX9-SDAG-LABEL: v_mul_fabs_32_f64:
2402 ; GFX9-SDAG: ; %bb.0:
2403 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5
2405 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2407 ; GFX9-GISEL-LABEL: v_mul_fabs_32_f64:
2408 ; GFX9-GISEL: ; %bb.0:
2409 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2410 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
2411 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000
2412 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
2413 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2415 ; GFX10-SDAG-LABEL: v_mul_fabs_32_f64:
2416 ; GFX10-SDAG: ; %bb.0:
2417 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5
2419 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2421 ; GFX10-GISEL-LABEL: v_mul_fabs_32_f64:
2422 ; GFX10-GISEL: ; %bb.0:
2423 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2424 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, |v[0:1]|
2425 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2427 ; GFX11-SDAG-LABEL: v_mul_fabs_32_f64:
2428 ; GFX11-SDAG: ; %bb.0:
2429 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2430 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5
2431 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2433 ; GFX11-GISEL-LABEL: v_mul_fabs_32_f64:
2434 ; GFX11-GISEL: ; %bb.0:
2435 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2436 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, |v[0:1]|
2437 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2438 %x.fabs = call double @llvm.fabs.f64(double %x)
2439 %mul = fmul double %x.fabs, 32.0
2443 define double @v_mul_add_fma_fabs_32_f64(double %x, double %y) {
2444 ; GFX9-SDAG-LABEL: v_mul_add_fma_fabs_32_f64:
2445 ; GFX9-SDAG: ; %bb.0:
2446 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
2448 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000
2449 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], |v[0:1]|, s[4:5], v[2:3]
2450 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2452 ; GFX9-GISEL-LABEL: v_mul_add_fma_fabs_32_f64:
2453 ; GFX9-GISEL: ; %bb.0:
2454 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
2456 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000
2457 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], |v[0:1]|, v[4:5], v[2:3]
2458 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2460 ; GFX1011-LABEL: v_mul_add_fma_fabs_32_f64:
2462 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463 ; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, |v[0:1]|, v[2:3]
2464 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
2465 %x.fabs = call double @llvm.fabs.f64(double %x)
2466 %mul = fmul contract double %x.fabs, 32.0
2467 %fma = fadd contract double %mul, %y
2471 define <2 x double> @v_mul_16_v2f64(<2 x double> %x) {
2472 ; GFX9-SDAG-LABEL: v_mul_16_v2f64:
2473 ; GFX9-SDAG: ; %bb.0:
2474 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
2476 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4
2477 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2479 ; GFX9-GISEL-LABEL: v_mul_16_v2f64:
2480 ; GFX9-GISEL: ; %bb.0:
2481 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2482 ; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
2483 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000
2484 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
2485 ; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
2486 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2488 ; GFX10-SDAG-LABEL: v_mul_16_v2f64:
2489 ; GFX10-SDAG: ; %bb.0:
2490 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2491 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
2492 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4
2493 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2495 ; GFX10-GISEL-LABEL: v_mul_16_v2f64:
2496 ; GFX10-GISEL: ; %bb.0:
2497 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000
2499 ; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000
2500 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2502 ; GFX11-SDAG-LABEL: v_mul_16_v2f64:
2503 ; GFX11-SDAG: ; %bb.0:
2504 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2505 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4
2506 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4
2507 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2509 ; GFX11-GISEL-LABEL: v_mul_16_v2f64:
2510 ; GFX11-GISEL: ; %bb.0:
2511 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2512 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000
2513 ; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000
2514 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2515 %mul = fmul <2 x double> %x, <double 16.0, double 16.0>
2516 ret <2 x double> %mul
2519 define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) {
2520 ; GFX9-SDAG-LABEL: v_mul_neg16_v2f64:
2521 ; GFX9-SDAG: ; %bb.0:
2522 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
2524 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
2525 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2527 ; GFX9-GISEL-LABEL: v_mul_neg16_v2f64:
2528 ; GFX9-GISEL: ; %bb.0:
2529 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2530 ; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
2531 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
2532 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
2533 ; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
2534 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2536 ; GFX10-SDAG-LABEL: v_mul_neg16_v2f64:
2537 ; GFX10-SDAG: ; %bb.0:
2538 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
2540 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
2541 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2543 ; GFX10-GISEL-LABEL: v_mul_neg16_v2f64:
2544 ; GFX10-GISEL: ; %bb.0:
2545 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000
2547 ; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000
2548 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2550 ; GFX11-SDAG-LABEL: v_mul_neg16_v2f64:
2551 ; GFX11-SDAG: ; %bb.0:
2552 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
2554 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
2555 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2557 ; GFX11-GISEL-LABEL: v_mul_neg16_v2f64:
2558 ; GFX11-GISEL: ; %bb.0:
2559 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2560 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000
2561 ; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000
2562 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2563 %mul = fmul <2 x double> %x, <double -16.0, double -16.0>
2564 ret <2 x double> %mul
2567 define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) {
2568 ; GFX9-SDAG-LABEL: v_mul_fabs_16_v2f64:
2569 ; GFX9-SDAG: ; %bb.0:
2570 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2571 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4
2572 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4
2573 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2575 ; GFX9-GISEL-LABEL: v_mul_fabs_16_v2f64:
2576 ; GFX9-GISEL: ; %bb.0:
2577 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2578 ; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
2579 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000
2580 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
2581 ; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5]
2582 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2584 ; GFX10-SDAG-LABEL: v_mul_fabs_16_v2f64:
2585 ; GFX10-SDAG: ; %bb.0:
2586 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2587 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4
2588 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4
2589 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2591 ; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f64:
2592 ; GFX10-GISEL: ; %bb.0:
2593 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000
2595 ; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000
2596 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2598 ; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f64:
2599 ; GFX11-SDAG: ; %bb.0:
2600 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4
2602 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4
2603 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2605 ; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f64:
2606 ; GFX11-GISEL: ; %bb.0:
2607 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2608 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000
2609 ; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000
2610 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2611 %x.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
2612 %mul = fmul <2 x double> %x.fabs, <double 16.0, double 16.0>
2613 ret <2 x double> %mul
2616 define amdgpu_ps <2 x i32> @s_mul_32_f64(double inreg %x, double inreg %y) {
2617 ; GFX9-SDAG-LABEL: s_mul_32_f64:
2618 ; GFX9-SDAG: ; %bb.0:
2619 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5
2620 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s0, v0
2621 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s1, v1
2622 ; GFX9-SDAG-NEXT: ; return to shader part epilog
2624 ; GFX9-GISEL-LABEL: s_mul_32_f64:
2625 ; GFX9-GISEL: ; %bb.0:
2626 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2627 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000
2628 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
2629 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s0, v0
2630 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1
2631 ; GFX9-GISEL-NEXT: ; return to shader part epilog
2633 ; GFX10-SDAG-LABEL: s_mul_32_f64:
2634 ; GFX10-SDAG: ; %bb.0:
2635 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5
2636 ; GFX10-SDAG-NEXT: v_readfirstlane_b32 s0, v0
2637 ; GFX10-SDAG-NEXT: v_readfirstlane_b32 s1, v1
2638 ; GFX10-SDAG-NEXT: ; return to shader part epilog
2640 ; GFX10-GISEL-LABEL: s_mul_32_f64:
2641 ; GFX10-GISEL: ; %bb.0:
2642 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, s[0:1]
2643 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
2644 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
2645 ; GFX10-GISEL-NEXT: ; return to shader part epilog
2647 ; GFX11-SDAG-LABEL: s_mul_32_f64:
2648 ; GFX11-SDAG: ; %bb.0:
2649 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5
2650 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
2651 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1
2652 ; GFX11-SDAG-NEXT: ; return to shader part epilog
2654 ; GFX11-GISEL-LABEL: s_mul_32_f64:
2655 ; GFX11-GISEL: ; %bb.0:
2656 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, s[0:1]
2657 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
2658 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
2659 ; GFX11-GISEL-NEXT: ; return to shader part epilog
2660 %mul = fmul contract double %x, 32.0
2661 %cast = bitcast double %mul to <2 x i32>
2662 %cast.0 = extractelement <2 x i32> %cast, i32 0
2663 %cast.1 = extractelement <2 x i32> %cast, i32 1
2664 %readlane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
2665 %readlane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
2666 %insert.0 = insertelement <2 x i32> poison, i32 %readlane.0, i32 0
2667 %insert.1 = insertelement <2 x i32> %insert.0, i32 %readlane.1, i32 1
2668 ret <2 x i32> %insert.1
2672 define half @v_mul_0x1pn23_f16(half %x) {
2673 ; GCN-LABEL: v_mul_0x1pn23_f16:
2675 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676 ; GCN-NEXT: v_mul_f16_e32 v0, 2, v0
2677 ; GCN-NEXT: s_setpc_b64 s[30:31]
2678 %mul = fmul half %x, 0xH0002
2683 define half @v_mul_0x1pn17_f16(half %x) {
2684 ; GCN-LABEL: v_mul_0x1pn17_f16:
2686 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2687 ; GCN-NEXT: v_mul_f16_e32 v0, 0x80, v0
2688 ; GCN-NEXT: s_setpc_b64 s[30:31]
2689 %mul = fmul half %x, 0.00000762939453125
2694 define half @v_mul_0x1pn16_f16(half %x) {
2695 ; GCN-LABEL: v_mul_0x1pn16_f16:
2697 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2698 ; GCN-NEXT: v_mul_f16_e32 v0, 0x100, v0
2699 ; GCN-NEXT: s_setpc_b64 s[30:31]
2700 %mul = fmul half %x, 0.0000152587890625
2705 define half @v_mul_0x1pn15_f16(half %x) {
2706 ; GCN-LABEL: v_mul_0x1pn15_f16:
2708 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709 ; GCN-NEXT: v_mul_f16_e32 v0, 0x200, v0
2710 ; GCN-NEXT: s_setpc_b64 s[30:31]
2711 %mul = fmul half %x, 0.000030517578125
2715 define half @v_mul_neg256_f16(half %x) {
2716 ; GCN-LABEL: v_mul_neg256_f16:
2718 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2719 ; GCN-NEXT: v_mul_f16_e32 v0, 0xdc00, v0
2720 ; GCN-NEXT: s_setpc_b64 s[30:31]
2721 %mul = fmul half %x, -256.0
2725 define half @v_mul_neg128_f16(half %x) {
2726 ; GCN-LABEL: v_mul_neg128_f16:
2728 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2729 ; GCN-NEXT: v_mul_f16_e32 v0, 0xd800, v0
2730 ; GCN-NEXT: s_setpc_b64 s[30:31]
2731 %mul = fmul half %x, -128.0
2735 define half @v_mul_neg64_f16(half %x) {
2736 ; GCN-LABEL: v_mul_neg64_f16:
2738 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2739 ; GCN-NEXT: v_mul_f16_e32 v0, 0xd400, v0
2740 ; GCN-NEXT: s_setpc_b64 s[30:31]
2741 %mul = fmul half %x, -64.0
2745 define half @v_mul_neg32_f16(half %x) {
2746 ; GCN-LABEL: v_mul_neg32_f16:
2748 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2749 ; GCN-NEXT: v_mul_f16_e32 v0, 0xd000, v0
2750 ; GCN-NEXT: s_setpc_b64 s[30:31]
2751 %mul = fmul half %x, -32.0
2755 define half @v_mul_neg16_f16(half %x) {
2756 ; GCN-LABEL: v_mul_neg16_f16:
2758 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2759 ; GCN-NEXT: v_mul_f16_e32 v0, 0xcc00, v0
2760 ; GCN-NEXT: s_setpc_b64 s[30:31]
2761 %mul = fmul half %x, -16.0
2765 define half @v_mul_neg8_f16(half %x) {
2766 ; GCN-LABEL: v_mul_neg8_f16:
2768 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2769 ; GCN-NEXT: v_mul_f16_e32 v0, 0xc800, v0
2770 ; GCN-NEXT: s_setpc_b64 s[30:31]
2771 %mul = fmul half %x, -8.0
2775 define half @v_mul_neg4_f16(half %x) {
2776 ; GCN-LABEL: v_mul_neg4_f16:
2778 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2779 ; GCN-NEXT: v_mul_f16_e32 v0, -4.0, v0
2780 ; GCN-NEXT: s_setpc_b64 s[30:31]
2781 %mul = fmul half %x, -4.0
2785 define half @v_mul_neg2_f16(half %x) {
2786 ; GCN-LABEL: v_mul_neg2_f16:
2788 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2789 ; GCN-NEXT: v_mul_f16_e32 v0, -2.0, v0
2790 ; GCN-NEXT: s_setpc_b64 s[30:31]
2791 %mul = fmul half %x, -2.0
2795 define half @v_mul_neg1_f16(half %x) {
2796 ; GFX9-SDAG-LABEL: v_mul_neg1_f16:
2797 ; GFX9-SDAG: ; %bb.0:
2798 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2799 ; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
2800 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2802 ; GFX9-GISEL-LABEL: v_mul_neg1_f16:
2803 ; GFX9-GISEL: ; %bb.0:
2804 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2805 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
2806 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2808 ; GFX10-SDAG-LABEL: v_mul_neg1_f16:
2809 ; GFX10-SDAG: ; %bb.0:
2810 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811 ; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
2812 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2814 ; GFX10-GISEL-LABEL: v_mul_neg1_f16:
2815 ; GFX10-GISEL: ; %bb.0:
2816 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2817 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
2818 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2820 ; GFX11-SDAG-LABEL: v_mul_neg1_f16:
2821 ; GFX11-SDAG: ; %bb.0:
2822 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2823 ; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
2824 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2826 ; GFX11-GISEL-LABEL: v_mul_neg1_f16:
2827 ; GFX11-GISEL: ; %bb.0:
2828 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2829 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
2830 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2831 %mul = fmul half %x, -1.0
2835 define half @v_mul_neg_half_f16(half %x) {
2836 ; GCN-LABEL: v_mul_neg_half_f16:
2838 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2839 ; GCN-NEXT: v_mul_f16_e32 v0, -0.5, v0
2840 ; GCN-NEXT: s_setpc_b64 s[30:31]
2841 %mul = fmul half %x, -0.5
2845 define half @v_mul_neg_quarter_f16(half %x) {
2846 ; GCN-LABEL: v_mul_neg_quarter_f16:
2848 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2849 ; GCN-NEXT: v_mul_f16_e32 v0, 0xb400, v0
2850 ; GCN-NEXT: s_setpc_b64 s[30:31]
2851 %mul = fmul half %x, -0.25
2855 define half @v_mul_quarter_f16(half %x) {
2856 ; GCN-LABEL: v_mul_quarter_f16:
2858 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859 ; GCN-NEXT: v_mul_f16_e32 v0, 0x3400, v0
2860 ; GCN-NEXT: s_setpc_b64 s[30:31]
2861 %mul = fmul half %x, 0.25
2865 define half @v_mul_half_f16(half %x) {
2866 ; GCN-LABEL: v_mul_half_f16:
2868 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2869 ; GCN-NEXT: v_mul_f16_e32 v0, 0.5, v0
2870 ; GCN-NEXT: s_setpc_b64 s[30:31]
2871 %mul = fmul half %x, 0.5
2875 define half @v_mul_1_f16(half %x) {
2876 ; GCN-LABEL: v_mul_1_f16:
2878 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2879 ; GCN-NEXT: s_setpc_b64 s[30:31]
2880 %mul = fmul half %x, 1.0
2884 define half @v_mul_2_f16(half %x) {
2885 ; GFX9-SDAG-LABEL: v_mul_2_f16:
2886 ; GFX9-SDAG: ; %bb.0:
2887 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2888 ; GFX9-SDAG-NEXT: v_add_f16_e32 v0, v0, v0
2889 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
2891 ; GFX9-GISEL-LABEL: v_mul_2_f16:
2892 ; GFX9-GISEL: ; %bb.0:
2893 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2894 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, 2.0, v0
2895 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2897 ; GFX10-SDAG-LABEL: v_mul_2_f16:
2898 ; GFX10-SDAG: ; %bb.0:
2899 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2900 ; GFX10-SDAG-NEXT: v_add_f16_e32 v0, v0, v0
2901 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2903 ; GFX10-GISEL-LABEL: v_mul_2_f16:
2904 ; GFX10-GISEL: ; %bb.0:
2905 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2906 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, 2.0, v0
2907 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2909 ; GFX11-SDAG-LABEL: v_mul_2_f16:
2910 ; GFX11-SDAG: ; %bb.0:
2911 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2912 ; GFX11-SDAG-NEXT: v_add_f16_e32 v0, v0, v0
2913 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2915 ; GFX11-GISEL-LABEL: v_mul_2_f16:
2916 ; GFX11-GISEL: ; %bb.0:
2917 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, 2.0, v0
2919 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
2920 %mul = fmul half %x, 2.0
2924 define half @v_mul_4_f16(half %x) {
2925 ; GCN-LABEL: v_mul_4_f16:
2927 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2928 ; GCN-NEXT: v_mul_f16_e32 v0, 4.0, v0
2929 ; GCN-NEXT: s_setpc_b64 s[30:31]
2930 %mul = fmul half %x, 4.0
2934 define half @v_mul_8_f16(half %x) {
2935 ; GCN-LABEL: v_mul_8_f16:
2937 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2938 ; GCN-NEXT: v_mul_f16_e32 v0, 0x4800, v0
2939 ; GCN-NEXT: s_setpc_b64 s[30:31]
2940 %mul = fmul half %x, 8.0
2944 define half @v_mul_16_f16(half %x) {
2945 ; GCN-LABEL: v_mul_16_f16:
2947 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2948 ; GCN-NEXT: v_mul_f16_e32 v0, 0x4c00, v0
2949 ; GCN-NEXT: s_setpc_b64 s[30:31]
2950 %mul = fmul half %x, 16.0
2954 define half @v_mul_32_f16(half %x) {
2955 ; GCN-LABEL: v_mul_32_f16:
2957 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5000, v0
2959 ; GCN-NEXT: s_setpc_b64 s[30:31]
2960 %mul = fmul half %x, 32.0
2964 define half @v_mul_64_f16(half %x) {
2965 ; GCN-LABEL: v_mul_64_f16:
2967 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2968 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5400, v0
2969 ; GCN-NEXT: s_setpc_b64 s[30:31]
2970 %mul = fmul half %x, 64.0
2974 define half @v_mul_128_f16(half %x) {
2975 ; GCN-LABEL: v_mul_128_f16:
2977 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2978 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5800, v0
2979 ; GCN-NEXT: s_setpc_b64 s[30:31]
2980 %mul = fmul half %x, 128.0
2984 define half @v_mul_256_f16(half %x) {
2985 ; GCN-LABEL: v_mul_256_f16:
2987 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2988 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5c00, v0
2989 ; GCN-NEXT: s_setpc_b64 s[30:31]
2990 %mul = fmul half %x, 256.0
2995 define half @v_mul_0x1p15_f16(half %x) {
2996 ; GCN-LABEL: v_mul_0x1p15_f16:
2998 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999 ; GCN-NEXT: v_mul_f16_e32 v0, 0x7800, v0
3000 ; GCN-NEXT: s_setpc_b64 s[30:31]
3001 %mul = fmul half %x, 32768.0
3006 define half @v_mul_0x1p14_f16(half %x) {
3007 ; GCN-LABEL: v_mul_0x1p14_f16:
3009 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3010 ; GCN-NEXT: v_mul_f16_e32 v0, 0x7400, v0
3011 ; GCN-NEXT: s_setpc_b64 s[30:31]
3012 %mul = fmul half %x, 16384.0
3016 ; Check that this doesn't interfer with fma formation
3017 define half @v_fma_mul_add_32_f16(half %x, half %y) {
3018 ; GFX9-SDAG-LABEL: v_fma_mul_add_32_f16:
3019 ; GFX9-SDAG: ; %bb.0:
3020 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3021 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5000
3022 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v0, s4, v1
3023 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3025 ; GFX9-GISEL-LABEL: v_fma_mul_add_32_f16:
3026 ; GFX9-GISEL: ; %bb.0:
3027 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3028 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5000
3029 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v2, v1
3030 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3032 ; GFX1011-LABEL: v_fma_mul_add_32_f16:
3034 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3035 ; GFX1011-NEXT: v_fmamk_f16 v0, v0, 0x5000, v1
3036 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3037 %mul = fmul contract half %x, 32.0
3038 %fma = fadd contract half %mul, %y
3042 define half @v_fma_mul_sub_32_f16(half %x, half %y) {
3043 ; GFX9-SDAG-LABEL: v_fma_mul_sub_32_f16:
3044 ; GFX9-SDAG: ; %bb.0:
3045 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3046 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5000
3047 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v0, s4, -v1
3048 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3050 ; GFX9-GISEL-LABEL: v_fma_mul_sub_32_f16:
3051 ; GFX9-GISEL: ; %bb.0:
3052 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3053 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5000
3054 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v2, -v1
3055 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3057 ; GFX1011-LABEL: v_fma_mul_sub_32_f16:
3059 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3060 ; GFX1011-NEXT: v_fma_f16 v0, v0, 0x5000, -v1
3061 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3062 %mul = fmul contract half %x, 32.0
3063 %fma = fsub contract half %mul, %y
3067 define half @v_fma_mul_add_neg32_f16(half %x, half %y) {
3068 ; GFX9-SDAG-LABEL: v_fma_mul_add_neg32_f16:
3069 ; GFX9-SDAG: ; %bb.0:
3070 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3071 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xd000
3072 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v0, s4, v1
3073 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3075 ; GFX9-GISEL-LABEL: v_fma_mul_add_neg32_f16:
3076 ; GFX9-GISEL: ; %bb.0:
3077 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3078 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd000
3079 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v2, v1
3080 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3082 ; GFX1011-LABEL: v_fma_mul_add_neg32_f16:
3084 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3085 ; GFX1011-NEXT: v_fmamk_f16 v0, v0, 0xd000, v1
3086 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3087 %mul = fmul contract half %x, -32.0
3088 %fma = fadd contract half %mul, %y
3092 define half @v_mul_fabs_32_f16(half %x) {
3093 ; GFX9-SDAG-LABEL: v_mul_fabs_32_f16:
3094 ; GFX9-SDAG: ; %bb.0:
3095 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3096 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5000
3097 ; GFX9-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4
3098 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3100 ; GFX9-GISEL-LABEL: v_mul_fabs_32_f16:
3101 ; GFX9-GISEL: ; %bb.0:
3102 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3103 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x5000
3104 ; GFX9-GISEL-NEXT: v_mul_f16_e64 v0, |v0|, v1
3105 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3107 ; GFX1011-LABEL: v_mul_fabs_32_f16:
3109 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3110 ; GFX1011-NEXT: v_mul_f16_e64 v0, 0x5000, |v0|
3111 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3112 %x.fabs = call half @llvm.fabs.f16(half %x)
3113 %mul = fmul half %x.fabs, 32.0
3117 define half @v_mul_add_fma_fabs_32_f16(half %x, half %y) {
3118 ; GFX9-SDAG-LABEL: v_mul_add_fma_fabs_32_f16:
3119 ; GFX9-SDAG: ; %bb.0:
3120 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3121 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5000
3122 ; GFX9-SDAG-NEXT: v_fma_f16 v0, |v0|, s4, v1
3123 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3125 ; GFX9-GISEL-LABEL: v_mul_add_fma_fabs_32_f16:
3126 ; GFX9-GISEL: ; %bb.0:
3127 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3128 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5000
3129 ; GFX9-GISEL-NEXT: v_fma_f16 v0, |v0|, v2, v1
3130 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3132 ; GFX1011-LABEL: v_mul_add_fma_fabs_32_f16:
3134 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3135 ; GFX1011-NEXT: v_fma_f16 v0, |v0|, 0x5000, v1
3136 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3137 %x.fabs = call half @llvm.fabs.f16(half %x)
3138 %mul = fmul contract half %x.fabs, 32.0
3139 %fma = fadd contract half %mul, %y
3143 define <2 x half> @v_mul_16_v2f16(<2 x half> %x) {
3144 ; GFX9-SDAG-LABEL: v_mul_16_v2f16:
3145 ; GFX9-SDAG: ; %bb.0:
3146 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3147 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x4c00
3148 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
3149 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3151 ; GFX9-GISEL-LABEL: v_mul_16_v2f16:
3152 ; GFX9-GISEL: ; %bb.0:
3153 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3154 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x4c004c00
3155 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
3156 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3158 ; GFX1011-LABEL: v_mul_16_v2f16:
3160 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3161 ; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
3162 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3163 %mul = fmul <2 x half> %x, <half 16.0, half 16.0>
3167 define <2 x half> @v_mul_neg16_v2f16(<2 x half> %x) {
3168 ; GFX9-SDAG-LABEL: v_mul_neg16_v2f16:
3169 ; GFX9-SDAG: ; %bb.0:
3170 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3171 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcc00
3172 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
3173 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3175 ; GFX9-GISEL-LABEL: v_mul_neg16_v2f16:
3176 ; GFX9-GISEL: ; %bb.0:
3177 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3178 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xcc00cc00
3179 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
3180 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3182 ; GFX1011-LABEL: v_mul_neg16_v2f16:
3184 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3185 ; GFX1011-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
3186 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3187 %mul = fmul <2 x half> %x, <half -16.0, half -16.0>
3191 define <2 x half> @v_mul_fabs_16_v2f16(<2 x half> %x) {
3192 ; GFX9-SDAG-LABEL: v_mul_fabs_16_v2f16:
3193 ; GFX9-SDAG: ; %bb.0:
3194 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3195 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
3196 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x4c00
3197 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
3198 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3200 ; GFX9-GISEL-LABEL: v_mul_fabs_16_v2f16:
3201 ; GFX9-GISEL: ; %bb.0:
3202 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3203 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
3204 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x4c004c00
3205 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
3206 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3208 ; GFX1011-LABEL: v_mul_fabs_16_v2f16:
3210 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3211 ; GFX1011-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
3212 ; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
3213 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3214 %x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
3215 %mul = fmul <2 x half> %x.fabs, <half 16.0, half 16.0>
3219 define <2 x half> @v_fma_mul_add_32_v2f16(<2 x half> %x, <2 x half> %y) {
3220 ; GFX9-SDAG-LABEL: v_fma_mul_add_32_v2f16:
3221 ; GFX9-SDAG: ; %bb.0:
3222 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3223 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x5000
3224 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v0, s4, v1 op_sel_hi:[1,0,1]
3225 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3227 ; GFX9-GISEL-LABEL: v_fma_mul_add_32_v2f16:
3228 ; GFX9-GISEL: ; %bb.0:
3229 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3230 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x50005000
3231 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v0, v2, v1
3232 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3234 ; GFX1011-LABEL: v_fma_mul_add_32_v2f16:
3236 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3237 ; GFX1011-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
3238 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3239 %mul = fmul contract <2 x half> %x, <half 32.0, half 32.0>
3240 %fma = fadd contract <2 x half> %mul, %y
3244 define amdgpu_ps i32 @s_mul_32_f16(half inreg %x, half inreg %y) {
3245 ; GFX9-LABEL: s_mul_32_f16:
3247 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5000
3248 ; GFX9-NEXT: v_mul_f16_e32 v0, s0, v0
3249 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
3250 ; GFX9-NEXT: ; return to shader part epilog
3252 ; GFX1011-LABEL: s_mul_32_f16:
3254 ; GFX1011-NEXT: v_mul_f16_e64 v0, 0x5000, s0
3255 ; GFX1011-NEXT: v_and_b32_e32 v0, 0xffff, v0
3256 ; GFX1011-NEXT: v_readfirstlane_b32 s0, v0
3257 ; GFX1011-NEXT: ; return to shader part epilog
3258 %mul = fmul contract half %x, 32.0
3259 %cast = bitcast half %mul to i16
3260 %zext = zext i16 %cast to i32
3261 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %zext)
3265 ; --------------------------------------------------------------------
3266 ; fmul by select of power of 2
3267 ; --------------------------------------------------------------------
3269 define float @v_mul_f32_select_64_1(i32 %arg, float %x) {
3270 ; GFX9-LABEL: v_mul_f32_select_64_1:
3272 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3273 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
3274 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3275 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
3276 ; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0
3277 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3279 ; GFX1011-LABEL: v_mul_f32_select_64_1:
3281 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3282 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3283 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
3284 ; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0
3285 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3286 %cond = icmp eq i32 %arg, 0
3287 %select.pow2 = select i1 %cond, float 64.0, float 1.0
3288 %mul = fmul float %x, %select.pow2
3292 define float @v_mul_f32_select_1_64(i32 %arg, float %x) {
3293 ; GFX9-LABEL: v_mul_f32_select_1_64:
3295 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3296 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
3297 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3298 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc
3299 ; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0
3300 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3302 ; GFX1011-LABEL: v_mul_f32_select_1_64:
3304 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3305 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3306 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
3307 ; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0
3308 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3309 %cond = icmp eq i32 %arg, 0
3310 %select.pow2 = select i1 %cond, float 1.0, float 64.0
3311 %mul = fmul float %x, %select.pow2
3315 define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) {
3316 ; GFX9-LABEL: v_mul_f32_select_n1_n64:
3318 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3319 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2800000
3320 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3321 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1.0, vcc
3322 ; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0
3323 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3325 ; GFX1011-LABEL: v_mul_f32_select_n1_n64:
3327 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3328 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3329 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
3330 ; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0
3331 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3332 %cond = icmp eq i32 %arg, 0
3333 %select.pow2 = select i1 %cond, float -1.0, float -64.0
3334 %mul = fmul float %x, %select.pow2
3338 define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) {
3339 ; GFX9-LABEL: v_mul_f32_select_n64_n1:
3341 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3342 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2800000
3343 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3344 ; GFX9-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
3345 ; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0
3346 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3348 ; GFX1011-LABEL: v_mul_f32_select_n64_n1:
3350 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3351 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3352 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
3353 ; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0
3354 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3355 %cond = icmp eq i32 %arg, 0
3356 %select.pow2 = select i1 %cond, float -64.0, float -1.0
3357 %mul = fmul float %x, %select.pow2
3361 define float @v_mul_f32_select_128_64(i32 %arg, float %x) {
3362 ; GFX9-SDAG-LABEL: v_mul_f32_select_128_64:
3363 ; GFX9-SDAG: ; %bb.0:
3364 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3365 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
3366 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000
3367 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3368 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
3369 ; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3370 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3372 ; GFX9-GISEL-LABEL: v_mul_f32_select_128_64:
3373 ; GFX9-GISEL: ; %bb.0:
3374 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3375 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x43000000
3376 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
3377 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3378 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3379 ; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3380 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3382 ; GFX10-SDAG-LABEL: v_mul_f32_select_128_64:
3383 ; GFX10-SDAG: ; %bb.0:
3384 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3385 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x43000000
3386 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3387 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v2, vcc_lo
3388 ; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3389 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3391 ; GFX10-GISEL-LABEL: v_mul_f32_select_128_64:
3392 ; GFX10-GISEL: ; %bb.0:
3393 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3394 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3395 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3396 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo
3397 ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3398 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3400 ; GFX11-SDAG-LABEL: v_mul_f32_select_128_64:
3401 ; GFX11-SDAG: ; %bb.0:
3402 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3403 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x43000000
3404 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3405 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v2, vcc_lo
3406 ; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3407 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3409 ; GFX11-GISEL-LABEL: v_mul_f32_select_128_64:
3410 ; GFX11-GISEL: ; %bb.0:
3411 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3412 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3413 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3414 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo
3415 ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3416 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3417 %cond = icmp eq i32 %arg, 0
3418 %select.pow2 = select i1 %cond, float 128.0, float 64.0
3419 %mul = fmul float %x, %select.pow2
3423 define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) {
3424 ; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64:
3425 ; GFX9-SDAG: ; %bb.0:
3426 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3427 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2800000
3428 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc3000000
3429 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3430 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
3431 ; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3432 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3434 ; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64:
3435 ; GFX9-GISEL: ; %bb.0:
3436 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3437 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000
3438 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2800000
3439 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3440 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3441 ; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3442 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3444 ; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64:
3445 ; GFX10-SDAG: ; %bb.0:
3446 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3447 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000
3448 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3449 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc2800000, v2, vcc_lo
3450 ; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3451 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3453 ; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64:
3454 ; GFX10-GISEL: ; %bb.0:
3455 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3456 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
3457 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3458 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
3459 ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3460 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3462 ; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64:
3463 ; GFX11-SDAG: ; %bb.0:
3464 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3465 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000
3466 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3467 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc2800000, v2, vcc_lo
3468 ; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3469 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3471 ; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64:
3472 ; GFX11-GISEL: ; %bb.0:
3473 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3474 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
3475 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3476 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
3477 ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3478 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3479 %cond = icmp eq i32 %arg, 0
3480 %select.pow2 = select i1 %cond, float -128.0, float -64.0
3481 %mul = fmul float %x, %select.pow2
3485 define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) {
3486 ; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n16:
3487 ; GFX9-SDAG: ; %bb.0:
3488 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3489 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xc1800000
3490 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc3000000
3491 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3492 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
3493 ; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3494 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3496 ; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n16:
3497 ; GFX9-GISEL: ; %bb.0:
3498 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3499 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000
3500 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1800000
3501 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3502 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3503 ; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3504 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3506 ; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n16:
3507 ; GFX10-SDAG: ; %bb.0:
3508 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3509 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000
3510 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3511 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc1800000, v2, vcc_lo
3512 ; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3513 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3515 ; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n16:
3516 ; GFX10-GISEL: ; %bb.0:
3517 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000
3519 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3520 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
3521 ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3522 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3524 ; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n16:
3525 ; GFX11-SDAG: ; %bb.0:
3526 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3527 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000
3528 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3529 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc1800000, v2, vcc_lo
3530 ; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
3531 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3533 ; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n16:
3534 ; GFX11-GISEL: ; %bb.0:
3535 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3536 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000
3537 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3538 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
3539 ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
3540 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3541 %cond = icmp eq i32 %arg, 0
3542 %select.pow2 = select i1 %cond, float -128.0, float -16.0
3543 %mul = fmul float %x, %select.pow2
3547 define float @v_contract_mul_add_f32_select_64_1(i32 %arg, float %x, float %y) {
3548 ; GFX9-LABEL: v_contract_mul_add_f32_select_64_1:
3550 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3551 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
3552 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3553 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
3554 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3555 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3557 ; GFX1011-LABEL: v_contract_mul_add_f32_select_64_1:
3559 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3560 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3561 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
3562 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3563 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3564 %cond = icmp eq i32 %arg, 0
3565 %select.pow2 = select contract i1 %cond, float 64.0, float 1.0
3566 %mul = fmul contract float %x, %select.pow2
3567 %fma = fadd contract float %mul, %y
3571 define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) {
3572 ; GFX9-LABEL: v_contract_mul_add_f32_select_1_64:
3574 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3575 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
3576 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3577 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc
3578 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3579 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3581 ; GFX1011-LABEL: v_contract_mul_add_f32_select_1_64:
3583 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3584 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3585 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
3586 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3587 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3588 %cond = icmp eq i32 %arg, 0
3589 %select.pow2 = select contract i1 %cond, float 1.0, float 64.0
3590 %mul = fmul contract float %x, %select.pow2
3591 %fma = fadd contract float %mul, %y
3595 define float @v_contract_mul_add_f32_select_n64_n1(i32 %arg, float %x, float %y) {
3596 ; GFX9-LABEL: v_contract_mul_add_f32_select_n64_n1:
3598 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3599 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000
3600 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3601 ; GFX9-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc
3602 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3603 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3605 ; GFX1011-LABEL: v_contract_mul_add_f32_select_n64_n1:
3607 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3608 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3609 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
3610 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3611 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3612 %cond = icmp eq i32 %arg, 0
3613 %select.pow2 = select contract i1 %cond, float -64.0, float -1.0
3614 %mul = fmul contract float %x, %select.pow2
3615 %fma = fadd contract float %mul, %y
3619 define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) {
3620 ; GFX9-LABEL: v_contract_mul_add_f32_select_n1_n64:
3622 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3623 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000
3624 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3625 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc
3626 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3627 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3629 ; GFX1011-LABEL: v_contract_mul_add_f32_select_n1_n64:
3631 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3632 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3633 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
3634 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3635 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3636 %cond = icmp eq i32 %arg, 0
3637 %select.pow2 = select contract i1 %cond, float -1.0, float -64.0
3638 %mul = fmul contract float %x, %select.pow2
3639 %fma = fadd contract float %mul, %y
3643 define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y) {
3644 ; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_128_64:
3645 ; GFX9-SDAG: ; %bb.0:
3646 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3647 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
3648 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x43000000
3649 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3650 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
3651 ; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
3652 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3654 ; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
3655 ; GFX9-GISEL: ; %bb.0:
3656 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3657 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43000000
3658 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
3659 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3660 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
3661 ; GFX9-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
3662 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3664 ; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_128_64:
3665 ; GFX10-SDAG: ; %bb.0:
3666 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3667 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000
3668 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3669 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v3, vcc_lo
3670 ; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
3671 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3673 ; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
3674 ; GFX10-GISEL: ; %bb.0:
3675 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3676 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
3677 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3678 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo
3679 ; GFX10-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
3680 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3682 ; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_128_64:
3683 ; GFX11-SDAG: ; %bb.0:
3684 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3685 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000
3686 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3687 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v3, vcc_lo
3688 ; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
3689 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3691 ; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
3692 ; GFX11-GISEL: ; %bb.0:
3693 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3694 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
3695 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3696 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo
3697 ; GFX11-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
3698 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3699 %cond = icmp eq i32 %arg, 0
3700 %select.pow2 = select i1 %cond, float 128.0, float 64.0
3701 %mul = fmul contract float %x, %select.pow2
3702 %fma = fadd contract float %mul, %y
3706 define float @v_contract_mul_add_f32_select_128_4(i32 %arg, float %x, float %y) {
3707 ; GFX9-LABEL: v_contract_mul_add_f32_select_128_4:
3709 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000
3711 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3712 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc
3713 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3714 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3716 ; GFX1011-LABEL: v_contract_mul_add_f32_select_128_4:
3718 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3719 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3720 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo
3721 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3722 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3723 %cond = icmp eq i32 %arg, 0
3724 %select.pow2 = select i1 %cond, float 128.0, float 4.0
3725 %mul = fmul contract float %x, %select.pow2
3726 %fma = fadd contract float %mul, %y
3730 define float @v_contract_mul_add_f32_select_2_4(i32 %arg, float %x, float %y) {
3731 ; GFX9-LABEL: v_contract_mul_add_f32_select_2_4:
3733 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3734 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3735 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 4.0, 2.0, vcc
3736 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3737 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3739 ; GFX1011-LABEL: v_contract_mul_add_f32_select_2_4:
3741 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3742 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3743 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4.0, 2.0, vcc_lo
3744 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3745 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3746 %cond = icmp eq i32 %arg, 0
3747 %select.pow2 = select i1 %cond, float 2.0, float 4.0
3748 %mul = fmul contract float %x, %select.pow2
3749 %fma = fadd contract float %mul, %y
3753 define float @v_contract_mul_add_f32_select_4_128(i32 %arg, float %x, float %y) {
3754 ; GFX9-LABEL: v_contract_mul_add_f32_select_4_128:
3756 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000
3758 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3759 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc
3760 ; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
3761 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3763 ; GFX1011-LABEL: v_contract_mul_add_f32_select_4_128:
3765 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3766 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3767 ; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo
3768 ; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
3769 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
3770 %cond = icmp eq i32 %arg, 0
3771 %select.pow2 = select i1 %cond, float 4.0, float 128.0
3772 %mul = fmul contract float %x, %select.pow2
3773 %fma = fadd contract float %mul, %y
3777 define double @v_mul_f64_select_64_1(i32 %arg, double %x) {
3778 ; GFX9-SDAG-LABEL: v_mul_f64_select_64_1:
3779 ; GFX9-SDAG: ; %bb.0:
3780 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3781 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3ff00000
3782 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40500000
3783 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3784 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
3785 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
3786 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3787 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3789 ; GFX9-GISEL-LABEL: v_mul_f64_select_64_1:
3790 ; GFX9-GISEL: ; %bb.0:
3791 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3792 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
3793 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3ff00000
3794 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3795 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
3796 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
3797 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3798 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3800 ; GFX10-SDAG-LABEL: v_mul_f64_select_64_1:
3801 ; GFX10-SDAG: ; %bb.0:
3802 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3803 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x40500000
3804 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3805 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
3806 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x3ff00000, v4, vcc_lo
3807 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3808 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3810 ; GFX10-GISEL-LABEL: v_mul_f64_select_64_1:
3811 ; GFX10-GISEL: ; %bb.0:
3812 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000
3814 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3815 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
3816 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo
3817 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3818 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3820 ; GFX11-SDAG-LABEL: v_mul_f64_select_64_1:
3821 ; GFX11-SDAG: ; %bb.0:
3822 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3823 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0
3824 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3825 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x3ff00000, v4, vcc_lo
3826 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3827 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3829 ; GFX11-GISEL-LABEL: v_mul_f64_select_64_1:
3830 ; GFX11-GISEL: ; %bb.0:
3831 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3832 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x3ff00000 :: v_dual_mov_b32 v3, 0
3833 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3834 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo
3835 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3836 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3837 %cond = icmp eq i32 %arg, 0
3838 %select.pow2 = select i1 %cond, double 64.0, double 1.0
3839 %mul = fmul double %x, %select.pow2
3843 define double @v_mul_f64_select_1_64(i32 %arg, double %x) {
3844 ; GFX9-SDAG-LABEL: v_mul_f64_select_1_64:
3845 ; GFX9-SDAG: ; %bb.0:
3846 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3847 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x40500000
3848 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3ff00000
3849 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3850 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
3851 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
3852 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3853 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3855 ; GFX9-GISEL-LABEL: v_mul_f64_select_1_64:
3856 ; GFX9-GISEL: ; %bb.0:
3857 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3858 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000
3859 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000
3860 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3861 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
3862 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
3863 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3864 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3866 ; GFX10-SDAG-LABEL: v_mul_f64_select_1_64:
3867 ; GFX10-SDAG: ; %bb.0:
3868 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3869 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x3ff00000
3870 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3871 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
3872 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo
3873 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3874 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3876 ; GFX10-GISEL-LABEL: v_mul_f64_select_1_64:
3877 ; GFX10-GISEL: ; %bb.0:
3878 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
3880 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3881 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
3882 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo
3883 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3884 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3886 ; GFX11-SDAG-LABEL: v_mul_f64_select_1_64:
3887 ; GFX11-SDAG: ; %bb.0:
3888 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x3ff00000 :: v_dual_mov_b32 v3, 0
3890 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3891 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo
3892 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3893 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3895 ; GFX11-GISEL-LABEL: v_mul_f64_select_1_64:
3896 ; GFX11-GISEL: ; %bb.0:
3897 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3898 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0
3899 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3900 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo
3901 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3902 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3903 %cond = icmp eq i32 %arg, 0
3904 %select.pow2 = select i1 %cond, double 1.0, double 64.0
3905 %mul = fmul double %x, %select.pow2
3909 define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) {
3910 ; GFX9-SDAG-LABEL: v_mul_f64_select_n1_n64:
3911 ; GFX9-SDAG: ; %bb.0:
3912 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3913 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0500000
3914 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
3915 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3916 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
3917 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
3918 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3919 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3921 ; GFX9-GISEL-LABEL: v_mul_f64_select_n1_n64:
3922 ; GFX9-GISEL: ; %bb.0:
3923 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3924 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbff00000
3925 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000
3926 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3927 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
3928 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
3929 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3930 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3932 ; GFX10-SDAG-LABEL: v_mul_f64_select_n1_n64:
3933 ; GFX10-SDAG: ; %bb.0:
3934 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3935 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
3936 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3937 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
3938 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo
3939 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3940 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3942 ; GFX10-GISEL-LABEL: v_mul_f64_select_n1_n64:
3943 ; GFX10-GISEL: ; %bb.0:
3944 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3945 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000
3946 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3947 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
3948 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo
3949 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3950 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3952 ; GFX11-SDAG-LABEL: v_mul_f64_select_n1_n64:
3953 ; GFX11-SDAG: ; %bb.0:
3954 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3955 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xbff00000 :: v_dual_mov_b32 v3, 0
3956 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3957 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo
3958 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3959 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
3961 ; GFX11-GISEL-LABEL: v_mul_f64_select_n1_n64:
3962 ; GFX11-GISEL: ; %bb.0:
3963 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0
3965 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3966 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo
3967 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3968 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
3969 %cond = icmp eq i32 %arg, 0
3970 %select.pow2 = select i1 %cond, double -1.0, double -64.0
3971 %mul = fmul double %x, %select.pow2
3975 define double @v_mul_f64_select_128_64(i32 %arg, double %x) {
3976 ; GFX9-SDAG-LABEL: v_mul_f64_select_128_64:
3977 ; GFX9-SDAG: ; %bb.0:
3978 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3979 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x40500000
3980 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40600000
3981 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3982 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
3983 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
3984 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3985 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3987 ; GFX9-GISEL-LABEL: v_mul_f64_select_128_64:
3988 ; GFX9-GISEL: ; %bb.0:
3989 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3990 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40600000
3991 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000
3992 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3993 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
3994 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
3995 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
3996 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3998 ; GFX10-SDAG-LABEL: v_mul_f64_select_128_64:
3999 ; GFX10-SDAG: ; %bb.0:
4000 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4001 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x40600000
4002 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4003 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
4004 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo
4005 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4006 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4008 ; GFX10-GISEL-LABEL: v_mul_f64_select_128_64:
4009 ; GFX10-GISEL: ; %bb.0:
4010 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4011 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
4012 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4013 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
4014 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo
4015 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4016 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4018 ; GFX11-SDAG-LABEL: v_mul_f64_select_128_64:
4019 ; GFX11-SDAG: ; %bb.0:
4020 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4021 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x40600000 :: v_dual_mov_b32 v3, 0
4022 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4023 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo
4024 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4025 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4027 ; GFX11-GISEL-LABEL: v_mul_f64_select_128_64:
4028 ; GFX11-GISEL: ; %bb.0:
4029 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4030 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0
4031 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4032 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo
4033 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4034 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4035 %cond = icmp eq i32 %arg, 0
4036 %select.pow2 = select i1 %cond, double 128.0, double 64.0
4037 %mul = fmul double %x, %select.pow2
4041 define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) {
4042 ; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n64:
4043 ; GFX9-SDAG: ; %bb.0:
4044 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4045 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0500000
4046 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000
4047 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4048 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
4049 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
4050 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4051 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4053 ; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n64:
4054 ; GFX9-GISEL: ; %bb.0:
4055 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4056 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000
4057 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000
4058 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4059 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
4060 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
4061 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4062 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4064 ; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n64:
4065 ; GFX10-SDAG: ; %bb.0:
4066 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4067 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000
4068 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4069 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
4070 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo
4071 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4072 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4074 ; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n64:
4075 ; GFX10-GISEL: ; %bb.0:
4076 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4077 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000
4078 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4079 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
4080 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
4081 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4082 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4084 ; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n64:
4085 ; GFX11-SDAG: ; %bb.0:
4086 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4087 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xc0600000 :: v_dual_mov_b32 v3, 0
4088 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4089 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo
4090 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4091 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4093 ; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n64:
4094 ; GFX11-GISEL: ; %bb.0:
4095 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4096 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0
4097 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4098 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
4099 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4100 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4101 %cond = icmp eq i32 %arg, 0
4102 %select.pow2 = select i1 %cond, double -128.0, double -64.0
4103 %mul = fmul double %x, %select.pow2
4107 define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) {
4108 ; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n16:
4109 ; GFX9-SDAG: ; %bb.0:
4110 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4111 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0300000
4112 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000
4113 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4114 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
4115 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0
4116 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4117 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4119 ; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n16:
4120 ; GFX9-GISEL: ; %bb.0:
4121 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4122 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000
4123 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0300000
4124 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4125 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
4126 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
4127 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4128 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4130 ; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n16:
4131 ; GFX10-SDAG: ; %bb.0:
4132 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4133 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000
4134 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4135 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0
4136 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0300000, v4, vcc_lo
4137 ; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4138 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4140 ; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n16:
4141 ; GFX10-GISEL: ; %bb.0:
4142 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4143 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0300000
4144 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4145 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
4146 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
4147 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4148 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4150 ; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n16:
4151 ; GFX11-SDAG: ; %bb.0:
4152 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4153 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xc0600000 :: v_dual_mov_b32 v3, 0
4154 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4155 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0300000, v4, vcc_lo
4156 ; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4157 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4159 ; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n16:
4160 ; GFX11-GISEL: ; %bb.0:
4161 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4162 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0300000 :: v_dual_mov_b32 v3, 0
4163 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4164 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
4165 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
4166 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4167 %cond = icmp eq i32 %arg, 0
4168 %select.pow2 = select i1 %cond, double -128.0, double -16.0
4169 %mul = fmul double %x, %select.pow2
4173 define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y) {
4174 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_64_1:
4175 ; GFX9-SDAG: ; %bb.0:
4176 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4177 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3ff00000
4178 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x40500000
4179 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4180 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4181 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4182 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4183 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4185 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
4186 ; GFX9-GISEL: ; %bb.0:
4187 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4188 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
4189 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x3ff00000
4190 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4191 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4192 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4193 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4194 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4196 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_64_1:
4197 ; GFX10-SDAG: ; %bb.0:
4198 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4199 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0x40500000
4200 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4201 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4202 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x3ff00000, v6, vcc_lo
4203 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4204 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4206 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
4207 ; GFX10-GISEL: ; %bb.0:
4208 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4209 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
4210 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4211 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4212 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo
4213 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4214 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4216 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_64_1:
4217 ; GFX11-SDAG: ; %bb.0:
4218 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4219 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0
4220 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4221 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x3ff00000, v6, vcc_lo
4222 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4223 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4225 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
4226 ; GFX11-GISEL: ; %bb.0:
4227 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4228 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x3ff00000 :: v_dual_mov_b32 v5, 0
4229 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4230 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo
4231 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4232 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4233 %cond = icmp eq i32 %arg, 0
4234 %select.pow2 = select contract i1 %cond, double 64.0, double 1.0
4235 %mul = fmul contract double %x, %select.pow2
4236 %fma = fadd contract double %mul, %y
4240 define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y) {
4241 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_1_64:
4242 ; GFX9-SDAG: ; %bb.0:
4243 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4244 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40500000
4245 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3ff00000
4246 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4247 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4248 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4249 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4250 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4252 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
4253 ; GFX9-GISEL: ; %bb.0:
4254 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4255 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
4256 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000
4257 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4258 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4259 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4260 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4261 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4263 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_1_64:
4264 ; GFX10-SDAG: ; %bb.0:
4265 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0x3ff00000
4267 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4268 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4269 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40500000, v6, vcc_lo
4270 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4271 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4273 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
4274 ; GFX10-GISEL: ; %bb.0:
4275 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4276 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
4277 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4278 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4279 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo
4280 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4281 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4283 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_1_64:
4284 ; GFX11-SDAG: ; %bb.0:
4285 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4286 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0x3ff00000 :: v_dual_mov_b32 v5, 0
4287 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4288 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40500000, v6, vcc_lo
4289 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4290 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4292 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
4293 ; GFX11-GISEL: ; %bb.0:
4294 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4295 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0
4296 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4297 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo
4298 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4299 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4300 %cond = icmp eq i32 %arg, 0
4301 %select.pow2 = select contract i1 %cond, double 1.0, double 64.0
4302 %mul = fmul contract double %x, %select.pow2
4303 %fma = fadd contract double %mul, %y
4307 define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double %y) {
4308 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1:
4309 ; GFX9-SDAG: ; %bb.0:
4310 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4311 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0xbff00000
4312 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0xc0500000
4313 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4314 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4315 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4316 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4317 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4319 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
4320 ; GFX9-GISEL: ; %bb.0:
4321 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4322 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000
4323 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xbff00000
4324 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4325 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4326 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4327 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4328 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4330 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1:
4331 ; GFX10-SDAG: ; %bb.0:
4332 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4333 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0xc0500000
4334 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4335 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4336 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0xbff00000, v6, vcc_lo
4337 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4338 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4340 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
4341 ; GFX10-GISEL: ; %bb.0:
4342 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000
4344 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4345 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4346 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo
4347 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4348 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4350 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1:
4351 ; GFX11-SDAG: ; %bb.0:
4352 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4353 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0xc0500000 :: v_dual_mov_b32 v5, 0
4354 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4355 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0xbff00000, v6, vcc_lo
4356 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4357 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4359 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
4360 ; GFX11-GISEL: ; %bb.0:
4361 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4362 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xbff00000 :: v_dual_mov_b32 v5, 0
4363 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4364 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo
4365 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4366 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4367 %cond = icmp eq i32 %arg, 0
4368 %select.pow2 = select contract i1 %cond, double -64.0, double -1.0
4369 %mul = fmul contract double %x, %select.pow2
4370 %fma = fadd contract double %mul, %y
4374 define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double %y) {
4375 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64:
4376 ; GFX9-SDAG: ; %bb.0:
4377 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4378 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0xc0500000
4379 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0xbff00000
4380 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4381 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4382 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4383 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4384 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4386 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
4387 ; GFX9-GISEL: ; %bb.0:
4388 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4389 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000
4390 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xc0500000
4391 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4392 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4393 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4394 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4395 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4397 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64:
4398 ; GFX10-SDAG: ; %bb.0:
4399 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4400 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0xbff00000
4401 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4402 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4403 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0xc0500000, v6, vcc_lo
4404 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4405 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4407 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
4408 ; GFX10-GISEL: ; %bb.0:
4409 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4410 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000
4411 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4412 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4413 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo
4414 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4415 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4417 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64:
4418 ; GFX11-SDAG: ; %bb.0:
4419 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4420 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0xbff00000 :: v_dual_mov_b32 v5, 0
4421 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4422 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0xc0500000, v6, vcc_lo
4423 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4424 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4426 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
4427 ; GFX11-GISEL: ; %bb.0:
4428 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4429 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xc0500000 :: v_dual_mov_b32 v5, 0
4430 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4431 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo
4432 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4433 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4434 %cond = icmp eq i32 %arg, 0
4435 %select.pow2 = select contract i1 %cond, double -1.0, double -64.0
4436 %mul = fmul contract double %x, %select.pow2
4437 %fma = fadd contract double %mul, %y
4441 define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double %y) {
4442 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_128_64:
4443 ; GFX9-SDAG: ; %bb.0:
4444 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4445 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40500000
4446 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x40600000
4447 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4448 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4449 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4450 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4451 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4453 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
4454 ; GFX9-GISEL: ; %bb.0:
4455 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4456 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
4457 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000
4458 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4459 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4460 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4461 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4462 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4464 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_64:
4465 ; GFX10-SDAG: ; %bb.0:
4466 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4467 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0x40600000
4468 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4469 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4470 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40500000, v6, vcc_lo
4471 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4472 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4474 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
4475 ; GFX10-GISEL: ; %bb.0:
4476 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4477 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
4478 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4479 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4480 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
4481 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4482 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4484 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_64:
4485 ; GFX11-SDAG: ; %bb.0:
4486 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4487 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0x40600000 :: v_dual_mov_b32 v5, 0
4488 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4489 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40500000, v6, vcc_lo
4490 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4491 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4493 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
4494 ; GFX11-GISEL: ; %bb.0:
4495 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4496 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0
4497 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4498 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
4499 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4500 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4501 %cond = icmp eq i32 %arg, 0
4502 %select.pow2 = select i1 %cond, double 128.0, double 64.0
4503 %mul = fmul contract double %x, %select.pow2
4504 %fma = fadd contract double %mul, %y
4508 define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double %y) {
4509 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_128_4:
4510 ; GFX9-SDAG: ; %bb.0:
4511 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4512 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40100000
4513 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x40600000
4514 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4515 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4516 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4517 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4518 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4520 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
4521 ; GFX9-GISEL: ; %bb.0:
4522 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4523 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
4524 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40100000
4525 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4526 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4527 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4528 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4529 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4531 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_4:
4532 ; GFX10-SDAG: ; %bb.0:
4533 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4534 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0x40600000
4535 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4536 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4537 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40100000, v6, vcc_lo
4538 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4539 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4541 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
4542 ; GFX10-GISEL: ; %bb.0:
4543 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4544 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
4545 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4546 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4547 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
4548 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4549 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4551 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_4:
4552 ; GFX11-SDAG: ; %bb.0:
4553 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4554 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0x40600000 :: v_dual_mov_b32 v5, 0
4555 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4556 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40100000, v6, vcc_lo
4557 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4558 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4560 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
4561 ; GFX11-GISEL: ; %bb.0:
4562 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4563 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40100000 :: v_dual_mov_b32 v5, 0
4564 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4565 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
4566 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4567 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4568 %cond = icmp eq i32 %arg, 0
4569 %select.pow2 = select i1 %cond, double 128.0, double 4.0
4570 %mul = fmul contract double %x, %select.pow2
4571 %fma = fadd contract double %mul, %y
4575 define double @v_contract_mul_add_f64_select_2_4(i32 %arg, double %x, double %y) {
4576 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_2_4:
4577 ; GFX9-SDAG: ; %bb.0:
4578 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4579 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40100000
4580 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4581 ; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v6, v5, 2.0, vcc
4582 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4583 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4584 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4586 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_2_4:
4587 ; GFX9-GISEL: ; %bb.0:
4588 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4589 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
4590 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4591 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4592 ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 2.0, vcc
4593 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4594 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4596 ; GFX1011-LABEL: v_contract_mul_add_f64_select_2_4:
4598 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4599 ; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4600 ; GFX1011-NEXT: v_mov_b32_e32 v5, 0
4601 ; GFX1011-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo
4602 ; GFX1011-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4603 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
4604 %cond = icmp eq i32 %arg, 0
4605 %select.pow2 = select i1 %cond, double 2.0, double 4.0
4606 %mul = fmul contract double %x, %select.pow2
4607 %fma = fadd contract double %mul, %y
4611 define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double %y) {
4612 ; GFX9-SDAG-LABEL: v_contract_mul_add_f64_select_4_128:
4613 ; GFX9-SDAG: ; %bb.0:
4614 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4615 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40600000
4616 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x40100000
4617 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4618 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc
4619 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0
4620 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4621 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4623 ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
4624 ; GFX9-GISEL: ; %bb.0:
4625 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4626 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
4627 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40600000
4628 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4629 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
4630 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
4631 ; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4632 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4634 ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_4_128:
4635 ; GFX10-SDAG: ; %bb.0:
4636 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4637 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v6, 0x40100000
4638 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4639 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
4640 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40600000, v6, vcc_lo
4641 ; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4642 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4644 ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
4645 ; GFX10-GISEL: ; %bb.0:
4646 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
4648 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4649 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
4650 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo
4651 ; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4652 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4654 ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_4_128:
4655 ; GFX11-SDAG: ; %bb.0:
4656 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4657 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0x40100000 :: v_dual_mov_b32 v5, 0
4658 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4659 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v6, 0x40600000, v6, vcc_lo
4660 ; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4661 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4663 ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
4664 ; GFX11-GISEL: ; %bb.0:
4665 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4666 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40600000 :: v_dual_mov_b32 v5, 0
4667 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4668 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo
4669 ; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
4670 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4671 %cond = icmp eq i32 %arg, 0
4672 %select.pow2 = select i1 %cond, double 4.0, double 128.0
4673 %mul = fmul contract double %x, %select.pow2
4674 %fma = fadd contract double %mul, %y
4678 define half @v_mul_f16_select_64_1(i32 %arg, half %x) {
4679 ; GFX9-SDAG-LABEL: v_mul_f16_select_64_1:
4680 ; GFX9-SDAG: ; %bb.0:
4681 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4682 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00
4683 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
4684 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4685 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4686 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4687 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4689 ; GFX9-GISEL-LABEL: v_mul_f16_select_64_1:
4690 ; GFX9-GISEL: ; %bb.0:
4691 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4692 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
4693 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
4694 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4695 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
4696 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4697 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4699 ; GFX10-SDAG-LABEL: v_mul_f16_select_64_1:
4700 ; GFX10-SDAG: ; %bb.0:
4701 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4702 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400
4703 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4704 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
4705 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4706 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4708 ; GFX10-GISEL-LABEL: v_mul_f16_select_64_1:
4709 ; GFX10-GISEL: ; %bb.0:
4710 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4711 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
4712 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4713 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo
4714 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4715 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4717 ; GFX11-SDAG-LABEL: v_mul_f16_select_64_1:
4718 ; GFX11-SDAG: ; %bb.0:
4719 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4720 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400
4721 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4722 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
4723 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4724 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4726 ; GFX11-GISEL-LABEL: v_mul_f16_select_64_1:
4727 ; GFX11-GISEL: ; %bb.0:
4728 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4729 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
4730 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4731 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo
4732 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4733 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4734 %cond = icmp eq i32 %arg, 0
4735 %select.pow2 = select i1 %cond, half 64.0, half 1.0
4736 %mul = fmul half %x, %select.pow2
4740 define half @v_mul_f16_select_1_64(i32 %arg, half %x) {
4741 ; GFX9-SDAG-LABEL: v_mul_f16_select_1_64:
4742 ; GFX9-SDAG: ; %bb.0:
4743 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4744 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400
4745 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
4746 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4747 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4748 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4749 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4751 ; GFX9-GISEL-LABEL: v_mul_f16_select_1_64:
4752 ; GFX9-GISEL: ; %bb.0:
4753 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4754 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
4755 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
4756 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4757 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
4758 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4759 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4761 ; GFX10-SDAG-LABEL: v_mul_f16_select_1_64:
4762 ; GFX10-SDAG: ; %bb.0:
4763 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4764 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00
4765 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4766 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo
4767 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4768 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4770 ; GFX10-GISEL-LABEL: v_mul_f16_select_1_64:
4771 ; GFX10-GISEL: ; %bb.0:
4772 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4773 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
4774 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4775 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo
4776 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4777 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4779 ; GFX11-SDAG-LABEL: v_mul_f16_select_1_64:
4780 ; GFX11-SDAG: ; %bb.0:
4781 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4782 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00
4783 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4784 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo
4785 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4786 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4788 ; GFX11-GISEL-LABEL: v_mul_f16_select_1_64:
4789 ; GFX11-GISEL: ; %bb.0:
4790 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4791 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
4792 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4793 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo
4794 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4795 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4796 %cond = icmp eq i32 %arg, 0
4797 %select.pow2 = select i1 %cond, half 1.0, half 64.0
4798 %mul = fmul half %x, %select.pow2
4802 define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) {
4803 ; GFX9-SDAG-LABEL: v_mul_f16_select_n1_n64:
4804 ; GFX9-SDAG: ; %bb.0:
4805 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4806 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xd400
4807 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
4808 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4809 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4810 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4811 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4813 ; GFX9-GISEL-LABEL: v_mul_f16_select_n1_n64:
4814 ; GFX9-GISEL: ; %bb.0:
4815 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4816 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xbc00
4817 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
4818 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4819 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
4820 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4821 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4823 ; GFX10-SDAG-LABEL: v_mul_f16_select_n1_n64:
4824 ; GFX10-SDAG: ; %bb.0:
4825 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4826 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xbc00
4827 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4828 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo
4829 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4830 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4832 ; GFX10-GISEL-LABEL: v_mul_f16_select_n1_n64:
4833 ; GFX10-GISEL: ; %bb.0:
4834 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4835 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
4836 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4837 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo
4838 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4839 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4841 ; GFX11-SDAG-LABEL: v_mul_f16_select_n1_n64:
4842 ; GFX11-SDAG: ; %bb.0:
4843 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4844 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xbc00
4845 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4846 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo
4847 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4848 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4850 ; GFX11-GISEL-LABEL: v_mul_f16_select_n1_n64:
4851 ; GFX11-GISEL: ; %bb.0:
4852 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4853 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
4854 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4855 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo
4856 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4857 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4858 %cond = icmp eq i32 %arg, 0
4859 %select.pow2 = select i1 %cond, half -1.0, half -64.0
4860 %mul = fmul half %x, %select.pow2
4864 define half @v_mul_f16_select_128_64(i32 %arg, half %x) {
4865 ; GFX9-SDAG-LABEL: v_mul_f16_select_128_64:
4866 ; GFX9-SDAG: ; %bb.0:
4867 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4868 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400
4869 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
4870 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4871 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4872 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4873 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4875 ; GFX9-GISEL-LABEL: v_mul_f16_select_128_64:
4876 ; GFX9-GISEL: ; %bb.0:
4877 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4878 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5800
4879 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
4880 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4881 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
4882 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4883 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4885 ; GFX10-SDAG-LABEL: v_mul_f16_select_128_64:
4886 ; GFX10-SDAG: ; %bb.0:
4887 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4888 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x5800
4889 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4890 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo
4891 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4892 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4894 ; GFX10-GISEL-LABEL: v_mul_f16_select_128_64:
4895 ; GFX10-GISEL: ; %bb.0:
4896 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4897 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
4898 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4899 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo
4900 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4901 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4903 ; GFX11-SDAG-LABEL: v_mul_f16_select_128_64:
4904 ; GFX11-SDAG: ; %bb.0:
4905 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4906 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x5800
4907 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4908 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo
4909 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4910 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4912 ; GFX11-GISEL-LABEL: v_mul_f16_select_128_64:
4913 ; GFX11-GISEL: ; %bb.0:
4914 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4915 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
4916 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4917 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo
4918 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4919 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4920 %cond = icmp eq i32 %arg, 0
4921 %select.pow2 = select i1 %cond, half 128.0, half 64.0
4922 %mul = fmul half %x, %select.pow2
4926 define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) {
4927 ; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n64:
4928 ; GFX9-SDAG: ; %bb.0:
4929 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4930 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xd400
4931 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
4932 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4933 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4934 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4935 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4937 ; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n64:
4938 ; GFX9-GISEL: ; %bb.0:
4939 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4940 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800
4941 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
4942 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4943 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
4944 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4945 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
4947 ; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n64:
4948 ; GFX10-SDAG: ; %bb.0:
4949 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4950 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800
4951 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4952 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo
4953 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4954 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4956 ; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n64:
4957 ; GFX10-GISEL: ; %bb.0:
4958 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4959 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
4960 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4961 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
4962 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4963 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4965 ; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n64:
4966 ; GFX11-SDAG: ; %bb.0:
4967 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4968 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800
4969 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4970 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo
4971 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4972 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
4974 ; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n64:
4975 ; GFX11-GISEL: ; %bb.0:
4976 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4977 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
4978 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4979 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
4980 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
4981 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
4982 %cond = icmp eq i32 %arg, 0
4983 %select.pow2 = select i1 %cond, half -128.0, half -64.0
4984 %mul = fmul half %x, %select.pow2
4988 define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) {
4989 ; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n16:
4990 ; GFX9-SDAG: ; %bb.0:
4991 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xcc00
4993 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
4994 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4995 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
4996 ; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
4997 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
4999 ; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n16:
5000 ; GFX9-GISEL: ; %bb.0:
5001 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5002 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800
5003 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
5004 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5005 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
5006 ; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
5007 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5009 ; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n16:
5010 ; GFX10-SDAG: ; %bb.0:
5011 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5012 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800
5013 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5014 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v2, vcc_lo
5015 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
5016 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5018 ; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n16:
5019 ; GFX10-GISEL: ; %bb.0:
5020 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5021 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00
5022 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5023 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
5024 ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
5025 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5027 ; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n16:
5028 ; GFX11-SDAG: ; %bb.0:
5029 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5030 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800
5031 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5032 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v2, vcc_lo
5033 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0
5034 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5036 ; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n16:
5037 ; GFX11-GISEL: ; %bb.0:
5038 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5039 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00
5040 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5041 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
5042 ; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
5043 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5044 %cond = icmp eq i32 %arg, 0
5045 %select.pow2 = select i1 %cond, half -128.0, half -16.0
5046 %mul = fmul half %x, %select.pow2
5050 define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) {
5051 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_64_1:
5052 ; GFX9-SDAG: ; %bb.0:
5053 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5054 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5055 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
5056 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5057 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5058 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5059 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5061 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
5062 ; GFX9-GISEL: ; %bb.0:
5063 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5064 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5065 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
5066 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5067 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5068 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5069 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5071 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_64_1:
5072 ; GFX10-SDAG: ; %bb.0:
5073 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5074 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5075 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5076 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
5077 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5078 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5080 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
5081 ; GFX10-GISEL: ; %bb.0:
5082 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5083 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5084 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5085 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
5086 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5087 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5089 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_64_1:
5090 ; GFX11-SDAG: ; %bb.0:
5091 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5092 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5093 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5094 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
5095 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5096 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5098 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
5099 ; GFX11-GISEL: ; %bb.0:
5100 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5101 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5102 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5103 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
5104 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5105 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5106 %cond = icmp eq i32 %arg, 0
5107 %select.pow2 = select contract i1 %cond, half 64.0, half 1.0
5108 %mul = fmul contract half %x, %select.pow2
5109 %fma = fadd contract half %mul, %y
5113 define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) {
5114 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_1_64:
5115 ; GFX9-SDAG: ; %bb.0:
5116 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5117 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5118 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3c00
5119 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5120 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5121 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5122 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5124 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
5125 ; GFX9-GISEL: ; %bb.0:
5126 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5127 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5128 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
5129 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5130 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5131 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5132 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5134 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_1_64:
5135 ; GFX10-SDAG: ; %bb.0:
5136 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5137 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5138 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5139 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5140 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5141 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5143 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
5144 ; GFX10-GISEL: ; %bb.0:
5145 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5146 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5147 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5148 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
5149 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5150 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5152 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_1_64:
5153 ; GFX11-SDAG: ; %bb.0:
5154 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5155 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5156 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5157 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5158 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5159 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5161 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
5162 ; GFX11-GISEL: ; %bb.0:
5163 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5164 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5165 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5166 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
5167 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5168 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5169 %cond = icmp eq i32 %arg, 0
5170 %select.pow2 = select contract i1 %cond, half 1.0, half 64.0
5171 %mul = fmul contract half %x, %select.pow2
5172 %fma = fadd contract half %mul, %y
5176 define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) {
5177 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1:
5178 ; GFX9-SDAG: ; %bb.0:
5179 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5180 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
5181 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xd400
5182 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5183 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5184 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5185 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5187 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
5188 ; GFX9-GISEL: ; %bb.0:
5189 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5190 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5191 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
5192 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5193 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5194 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5195 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5197 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1:
5198 ; GFX10-SDAG: ; %bb.0:
5199 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5200 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xd400
5201 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5202 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
5203 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5204 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5206 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
5207 ; GFX10-GISEL: ; %bb.0:
5208 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5209 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
5210 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5211 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo
5212 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5213 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5215 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1:
5216 ; GFX11-SDAG: ; %bb.0:
5217 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5218 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xd400
5219 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5220 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
5221 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5222 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5224 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
5225 ; GFX11-GISEL: ; %bb.0:
5226 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5227 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
5228 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5229 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo
5230 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5231 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5232 %cond = icmp eq i32 %arg, 0
5233 %select.pow2 = select contract i1 %cond, half -64.0, half -1.0
5234 %mul = fmul contract half %x, %select.pow2
5235 %fma = fadd contract half %mul, %y
5239 define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) {
5240 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64:
5241 ; GFX9-SDAG: ; %bb.0:
5242 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5243 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd400
5244 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbc00
5245 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5246 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5247 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5248 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5250 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
5251 ; GFX9-GISEL: ; %bb.0:
5252 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5253 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
5254 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
5255 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5256 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5257 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5258 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5260 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64:
5261 ; GFX10-SDAG: ; %bb.0:
5262 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5263 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
5264 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5265 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5266 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5267 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5269 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
5270 ; GFX10-GISEL: ; %bb.0:
5271 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5272 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5273 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5274 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
5275 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5276 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5278 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64:
5279 ; GFX11-SDAG: ; %bb.0:
5280 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5281 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
5282 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5283 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5284 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5285 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5287 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
5288 ; GFX11-GISEL: ; %bb.0:
5289 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5290 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5291 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5292 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
5293 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5294 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5295 %cond = icmp eq i32 %arg, 0
5296 %select.pow2 = select contract i1 %cond, half -1.0, half -64.0
5297 %mul = fmul contract half %x, %select.pow2
5298 %fma = fadd contract half %mul, %y
5302 define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) {
5303 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_128_64:
5304 ; GFX9-SDAG: ; %bb.0:
5305 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5306 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5307 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
5308 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5309 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5310 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5311 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5313 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
5314 ; GFX9-GISEL: ; %bb.0:
5315 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5316 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
5317 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
5318 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5319 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5320 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5321 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5323 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_64:
5324 ; GFX10-SDAG: ; %bb.0:
5325 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5326 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5327 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5328 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5329 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5330 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5332 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
5333 ; GFX10-GISEL: ; %bb.0:
5334 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5335 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5336 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5337 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5338 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5339 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5341 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_64:
5342 ; GFX11-SDAG: ; %bb.0:
5343 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5344 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5345 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5346 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5347 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5348 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5350 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
5351 ; GFX11-GISEL: ; %bb.0:
5352 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5353 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5354 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5355 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5356 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5357 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5358 %cond = icmp eq i32 %arg, 0
5359 %select.pow2 = select i1 %cond, half 128.0, half 64.0
5360 %mul = fmul contract half %x, %select.pow2
5361 %fma = fadd contract half %mul, %y
5365 define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) {
5366 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_128_4:
5367 ; GFX9-SDAG: ; %bb.0:
5368 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5369 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4400
5370 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
5371 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5372 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5373 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5374 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5376 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
5377 ; GFX9-GISEL: ; %bb.0:
5378 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5379 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
5380 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
5381 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5382 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5383 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5384 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5386 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_4:
5387 ; GFX10-SDAG: ; %bb.0:
5388 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5389 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5390 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5391 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v3, vcc_lo
5392 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5393 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5395 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
5396 ; GFX10-GISEL: ; %bb.0:
5397 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5398 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
5399 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5400 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5401 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5402 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5404 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_4:
5405 ; GFX11-SDAG: ; %bb.0:
5406 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5407 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5408 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5409 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v3, vcc_lo
5410 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5411 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5413 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
5414 ; GFX11-GISEL: ; %bb.0:
5415 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5416 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
5417 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5418 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5419 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5420 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5421 %cond = icmp eq i32 %arg, 0
5422 %select.pow2 = select i1 %cond, half 128.0, half 4.0
5423 %mul = fmul contract half %x, %select.pow2
5424 %fma = fadd contract half %mul, %y
5428 define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) {
5429 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_2_4:
5430 ; GFX9-SDAG: ; %bb.0:
5431 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5432 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4400
5433 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000
5434 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5435 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5436 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5437 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5439 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
5440 ; GFX9-GISEL: ; %bb.0:
5441 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5442 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4000
5443 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
5444 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5445 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5446 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5447 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5449 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_2_4:
5450 ; GFX10-SDAG: ; %bb.0:
5451 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5452 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000
5453 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5454 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v3, vcc_lo
5455 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5456 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5458 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
5459 ; GFX10-GISEL: ; %bb.0:
5460 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5461 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
5462 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5463 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo
5464 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5465 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5467 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_2_4:
5468 ; GFX11-SDAG: ; %bb.0:
5469 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5470 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000
5471 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5472 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v3, vcc_lo
5473 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5474 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5476 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
5477 ; GFX11-GISEL: ; %bb.0:
5478 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5479 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
5480 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5481 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo
5482 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5483 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5484 %cond = icmp eq i32 %arg, 0
5485 %select.pow2 = select i1 %cond, half 2.0, half 4.0
5486 %mul = fmul contract half %x, %select.pow2
5487 %fma = fadd contract half %mul, %y
5491 define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) {
5492 ; GFX9-SDAG-LABEL: v_contract_mul_add_f16_select_4_128:
5493 ; GFX9-SDAG: ; %bb.0:
5494 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5495 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5496 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400
5497 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5498 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5499 ; GFX9-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5500 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5502 ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
5503 ; GFX9-GISEL: ; %bb.0:
5504 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5505 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
5506 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
5507 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5508 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5509 ; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5510 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5512 ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_4_128:
5513 ; GFX10-SDAG: ; %bb.0:
5514 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5515 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4400
5516 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5517 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5800, v3, vcc_lo
5518 ; GFX10-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5519 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5521 ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
5522 ; GFX10-GISEL: ; %bb.0:
5523 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5524 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
5525 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5526 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo
5527 ; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5528 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5530 ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_4_128:
5531 ; GFX11-SDAG: ; %bb.0:
5532 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5533 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x4400
5534 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5535 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5800, v3, vcc_lo
5536 ; GFX11-SDAG-NEXT: v_fma_f16 v0, v1, v0, v2
5537 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5539 ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
5540 ; GFX11-GISEL: ; %bb.0:
5541 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5542 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
5543 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5544 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo
5545 ; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
5546 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5547 %cond = icmp eq i32 %arg, 0
5548 %select.pow2 = select i1 %cond, half 4.0, half 128.0
5549 %mul = fmul contract half %x, %select.pow2
5550 %fma = fadd contract half %mul, %y
5554 define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) {
5555 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_64_1:
5556 ; GFX9-SDAG: ; %bb.0:
5557 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5558 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5559 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
5560 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5561 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5562 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5563 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5564 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5565 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5566 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5568 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_64_1:
5569 ; GFX9-GISEL: ; %bb.0:
5570 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5571 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5572 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
5573 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5574 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5575 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5576 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5577 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5578 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5579 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5580 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5582 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_64_1:
5583 ; GFX10-SDAG: ; %bb.0:
5584 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5585 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5586 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5587 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
5588 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5589 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
5590 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5591 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5592 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5594 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_64_1:
5595 ; GFX10-GISEL: ; %bb.0:
5596 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5597 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5598 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5599 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
5600 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5601 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5602 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo
5603 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5604 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5605 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5607 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_64_1:
5608 ; GFX11-SDAG: ; %bb.0:
5609 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5610 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5611 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5612 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
5613 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5614 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
5615 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5616 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5617 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5619 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_64_1:
5620 ; GFX11-GISEL: ; %bb.0:
5621 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5622 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5623 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5624 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
5625 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5626 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5627 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo
5628 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5629 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5630 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5631 %cond = icmp eq <2 x i32> %arg, zeroinitializer
5632 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 64.0, half 64.0>, <2 x half> <half 1.0, half 1.0>
5633 %mul = fmul <2 x half> %x, %select.pow2
5637 define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) {
5638 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_1_64:
5639 ; GFX9-SDAG: ; %bb.0:
5640 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5641 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5642 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3c00
5643 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5644 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5645 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5646 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5647 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5648 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5649 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5651 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_1_64:
5652 ; GFX9-GISEL: ; %bb.0:
5653 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5654 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
5655 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
5656 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5657 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5658 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5659 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5660 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5661 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5662 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5663 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5665 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_1_64:
5666 ; GFX10-SDAG: ; %bb.0:
5667 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5668 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5669 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5670 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v3, vcc_lo
5671 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5672 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5673 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5674 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5675 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5677 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_1_64:
5678 ; GFX10-GISEL: ; %bb.0:
5679 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5680 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5681 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5682 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
5683 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5684 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5685 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo
5686 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5687 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5688 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5690 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_1_64:
5691 ; GFX11-SDAG: ; %bb.0:
5692 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5693 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00
5694 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5695 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v3, vcc_lo
5696 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5697 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5698 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5699 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5700 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5702 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_1_64:
5703 ; GFX11-GISEL: ; %bb.0:
5704 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5705 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5706 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5707 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
5708 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5709 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5710 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo
5711 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5712 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5713 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5714 %cond = icmp eq <2 x i32> %arg, zeroinitializer
5715 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 1.0, half 1.0>, <2 x half> <half 64.0, half 64.0>
5716 %mul = fmul <2 x half> %x, %select.pow2
5720 define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) {
5721 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_n1_n64:
5722 ; GFX9-SDAG: ; %bb.0:
5723 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd400
5725 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbc00
5726 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5727 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5728 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5729 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5730 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5731 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5732 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5734 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
5735 ; GFX9-GISEL: ; %bb.0:
5736 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5737 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
5738 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
5739 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5740 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5741 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5742 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5743 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5744 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5745 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5746 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5748 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n1_n64:
5749 ; GFX10-SDAG: ; %bb.0:
5750 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5751 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
5752 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5753 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v3, vcc_lo
5754 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5755 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5756 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5757 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5758 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5760 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
5761 ; GFX10-GISEL: ; %bb.0:
5762 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5763 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5764 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5765 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
5766 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5767 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5768 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo
5769 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5770 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5771 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5773 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n1_n64:
5774 ; GFX11-SDAG: ; %bb.0:
5775 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5776 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00
5777 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5778 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v3, vcc_lo
5779 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5780 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5781 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5782 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5783 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5785 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
5786 ; GFX11-GISEL: ; %bb.0:
5787 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5788 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5789 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5790 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
5791 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5792 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5793 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo
5794 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5795 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5796 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5797 %cond = icmp eq <2 x i32> %arg, zeroinitializer
5798 %select.pow2 = select <2 x i1> %cond, <2 x half> <half -1.0, half -1.0>, <2 x half> <half -64.0, half -64.0>
5799 %mul = fmul <2 x half> %x, %select.pow2
5803 define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) {
5804 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_128_64:
5805 ; GFX9-SDAG: ; %bb.0:
5806 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5807 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400
5808 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
5809 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5810 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5811 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5812 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5813 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5814 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5815 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5817 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_128_64:
5818 ; GFX9-GISEL: ; %bb.0:
5819 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5820 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
5821 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
5822 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5823 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5824 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5825 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5826 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5827 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5828 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5829 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5831 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_128_64:
5832 ; GFX10-SDAG: ; %bb.0:
5833 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5834 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5835 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5836 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v3, vcc_lo
5837 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5838 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5839 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5840 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5841 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5843 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_128_64:
5844 ; GFX10-GISEL: ; %bb.0:
5845 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5846 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5847 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5848 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5849 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5850 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5851 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo
5852 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5853 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5854 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5856 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_128_64:
5857 ; GFX11-SDAG: ; %bb.0:
5858 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5859 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800
5860 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5861 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v3, vcc_lo
5862 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5863 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v3, vcc_lo
5864 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5865 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5866 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5868 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_128_64:
5869 ; GFX11-GISEL: ; %bb.0:
5870 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5871 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
5872 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5873 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
5874 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5875 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5876 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo
5877 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5878 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5879 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5880 %cond = icmp eq <2 x i32> %arg, zeroinitializer
5881 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 64.0, half 64.0>
5882 %mul = fmul <2 x half> %x, %select.pow2
5886 define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) {
5887 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_n128_n64:
5888 ; GFX9-SDAG: ; %bb.0:
5889 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5890 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd400
5891 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xd800
5892 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5893 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5894 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5895 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5896 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5897 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5898 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5900 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
5901 ; GFX9-GISEL: ; %bb.0:
5902 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5903 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800
5904 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
5905 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5906 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5907 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5908 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5909 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5910 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5911 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5912 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5914 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n64:
5915 ; GFX10-SDAG: ; %bb.0:
5916 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5917 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
5918 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5919 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v3, vcc_lo
5920 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5921 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5922 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5923 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5924 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5926 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
5927 ; GFX10-GISEL: ; %bb.0:
5928 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5929 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5930 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5931 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
5932 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5933 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5934 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
5935 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5936 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5937 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5939 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n64:
5940 ; GFX11-SDAG: ; %bb.0:
5941 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5942 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
5943 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5944 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v3, vcc_lo
5945 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5946 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v3, vcc_lo
5947 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5948 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5949 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
5951 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
5952 ; GFX11-GISEL: ; %bb.0:
5953 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5954 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
5955 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5956 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
5957 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5958 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5959 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
5960 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5961 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5962 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
5963 %cond = icmp eq <2 x i32> %arg, zeroinitializer
5964 %select.pow2 = select <2 x i1> %cond, <2 x half> <half -128.0, half -128.0>, <2 x half> <half -64.0, half -64.0>
5965 %mul = fmul <2 x half> %x, %select.pow2
5969 define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) {
5970 ; GFX9-SDAG-LABEL: v_mul_v2f16_select_n128_n16:
5971 ; GFX9-SDAG: ; %bb.0:
5972 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5973 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xcc00
5974 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xd800
5975 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5976 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
5977 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5978 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
5979 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
5980 ; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
5981 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5983 ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
5984 ; GFX9-GISEL: ; %bb.0:
5985 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5986 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800
5987 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xcc00
5988 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
5989 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
5990 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5991 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
5992 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5993 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
5994 ; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
5995 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5997 ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n16:
5998 ; GFX10-SDAG: ; %bb.0:
5999 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6000 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
6001 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6002 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xcc00, v3, vcc_lo
6003 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6004 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v3, vcc_lo
6005 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6006 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
6007 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6009 ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
6010 ; GFX10-GISEL: ; %bb.0:
6011 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6012 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
6013 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6014 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
6015 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6016 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6017 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
6018 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6019 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
6020 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6022 ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n16:
6023 ; GFX11-SDAG: ; %bb.0:
6024 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6025 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800
6026 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6027 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xcc00, v3, vcc_lo
6028 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6029 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v3, vcc_lo
6030 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6031 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v2, v0
6032 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6034 ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
6035 ; GFX11-GISEL: ; %bb.0:
6036 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6037 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
6038 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6039 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
6040 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6041 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6042 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
6043 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6044 ; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
6045 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6046 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6047 %select.pow2 = select <2 x i1> %cond, <2 x half> <half -128.0, half -128.0>, <2 x half> <half -16.0, half -16.0>
6048 %mul = fmul <2 x half> %x, %select.pow2
6052 define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6053 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1:
6054 ; GFX9-SDAG: ; %bb.0:
6055 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6056 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3c00
6057 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5400
6058 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6059 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6060 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6061 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6062 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6063 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6064 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6066 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
6067 ; GFX9-GISEL: ; %bb.0:
6068 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6069 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
6070 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3c00
6071 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6072 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6073 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6074 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6075 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6076 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6077 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6078 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6080 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1:
6081 ; GFX10-SDAG: ; %bb.0:
6082 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6083 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
6084 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6085 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
6086 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6087 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v4, vcc_lo
6088 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6089 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6090 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6092 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
6093 ; GFX10-GISEL: ; %bb.0:
6094 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6095 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
6096 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6097 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo
6098 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6099 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6100 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo
6101 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6102 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6103 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6105 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1:
6106 ; GFX11-SDAG: ; %bb.0:
6107 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6108 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
6109 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6110 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
6111 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6112 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v4, vcc_lo
6113 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6114 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6115 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6117 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
6118 ; GFX11-GISEL: ; %bb.0:
6119 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6120 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
6121 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6122 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo
6123 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6124 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6125 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo
6126 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6127 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6128 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6129 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6130 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 64.0, half 64.0>, <2 x half> <half 1.0, half 1.0>
6131 %mul = fmul contract <2 x half> %x, %select.pow2
6132 %fma = fadd contract <2 x half> %mul, %y
6136 define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6137 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64:
6138 ; GFX9-SDAG: ; %bb.0:
6139 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6140 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
6141 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00
6142 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6143 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6144 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6145 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6146 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6147 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6148 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6150 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
6151 ; GFX9-GISEL: ; %bb.0:
6152 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6153 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
6154 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400
6155 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6156 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6157 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6158 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6159 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6160 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6161 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6162 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6164 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64:
6165 ; GFX10-SDAG: ; %bb.0:
6166 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6167 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x3c00
6168 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6169 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v4, vcc_lo
6170 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6171 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v4, vcc_lo
6172 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6173 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6174 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6176 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
6177 ; GFX10-GISEL: ; %bb.0:
6178 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6179 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
6180 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6181 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo
6182 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6183 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6184 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo
6185 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6186 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6187 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6189 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64:
6190 ; GFX11-SDAG: ; %bb.0:
6191 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6192 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x3c00
6193 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6194 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v4, vcc_lo
6195 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6196 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v4, vcc_lo
6197 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6198 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6199 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6201 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
6202 ; GFX11-GISEL: ; %bb.0:
6203 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6204 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
6205 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6206 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo
6207 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6208 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6209 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo
6210 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6211 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6212 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6213 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6214 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 1.0, half 1.0>, <2 x half> <half 64.0, half 64.0>
6215 %mul = fmul contract <2 x half> %x, %select.pow2
6216 %fma = fadd contract <2 x half> %mul, %y
6220 define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6221 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6222 ; GFX9-SDAG: ; %bb.0:
6223 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6224 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbc00
6225 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0xd400
6226 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6227 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6228 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6229 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6230 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6231 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6232 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6234 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6235 ; GFX9-GISEL: ; %bb.0:
6236 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6237 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
6238 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbc00
6239 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6240 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6241 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6242 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6243 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6244 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6245 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6246 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6248 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6249 ; GFX10-SDAG: ; %bb.0:
6250 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6251 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xd400
6252 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6253 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
6254 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6255 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v4, vcc_lo
6256 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6257 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6258 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6260 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6261 ; GFX10-GISEL: ; %bb.0:
6262 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6263 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
6264 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6265 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo
6266 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6267 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6268 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo
6269 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6270 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6271 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6273 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6274 ; GFX11-SDAG: ; %bb.0:
6275 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6276 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0xd400
6277 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6278 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
6279 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6280 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v4, vcc_lo
6281 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6282 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6283 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6285 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
6286 ; GFX11-GISEL: ; %bb.0:
6287 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6288 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
6289 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6290 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo
6291 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6292 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6293 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo
6294 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6295 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6296 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6297 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6298 %select.pow2 = select <2 x i1> %cond, <2 x half> <half -64.0, half -64.0>, <2 x half> <half -1.0, half -1.0>
6299 %mul = fmul contract <2 x half> %x, %select.pow2
6300 %fma = fadd contract <2 x half> %mul, %y
6304 define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6305 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6306 ; GFX9-SDAG: ; %bb.0:
6307 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6308 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xd400
6309 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0xbc00
6310 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6311 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6312 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6313 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6314 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6315 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6316 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6318 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6319 ; GFX9-GISEL: ; %bb.0:
6320 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6321 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
6322 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xd400
6323 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6324 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6325 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6326 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6327 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6328 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6329 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6330 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6332 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6333 ; GFX10-SDAG: ; %bb.0:
6334 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6335 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xbc00
6336 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6337 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v4, vcc_lo
6338 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6339 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v4, vcc_lo
6340 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6341 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6342 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6344 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6345 ; GFX10-GISEL: ; %bb.0:
6346 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6347 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
6348 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6349 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo
6350 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6351 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6352 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo
6353 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6354 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6355 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6357 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6358 ; GFX11-SDAG: ; %bb.0:
6359 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6360 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0xbc00
6361 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6362 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xd400, v4, vcc_lo
6363 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6364 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v4, vcc_lo
6365 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6366 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6367 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6369 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
6370 ; GFX11-GISEL: ; %bb.0:
6371 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6372 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
6373 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6374 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo
6375 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6376 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6377 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo
6378 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6379 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6380 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6381 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6382 %select.pow2 = select <2 x i1> %cond, <2 x half> <half -1.0, half -1.0>, <2 x half> <half -64.0, half -64.0>
6383 %mul = fmul contract <2 x half> %x, %select.pow2
6384 %fma = fadd contract <2 x half> %mul, %y
6388 define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6389 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64:
6390 ; GFX9-SDAG: ; %bb.0:
6391 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6392 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5400
6393 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5800
6394 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6395 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6396 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6397 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6398 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6399 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6400 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6402 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
6403 ; GFX9-GISEL: ; %bb.0:
6404 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6405 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
6406 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400
6407 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6408 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6409 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6410 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6411 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6412 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6413 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6414 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6416 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64:
6417 ; GFX10-SDAG: ; %bb.0:
6418 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6419 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
6420 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6421 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v4, vcc_lo
6422 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6423 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v4, vcc_lo
6424 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6425 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6426 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6428 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
6429 ; GFX10-GISEL: ; %bb.0:
6430 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
6432 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6433 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
6434 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6435 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6436 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
6437 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6438 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6439 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6441 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64:
6442 ; GFX11-SDAG: ; %bb.0:
6443 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6444 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
6445 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6446 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5400, v4, vcc_lo
6447 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6448 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v4, vcc_lo
6449 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6450 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6451 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6453 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
6454 ; GFX11-GISEL: ; %bb.0:
6455 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6456 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
6457 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6458 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
6459 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6460 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6461 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
6462 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6463 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6464 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6465 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6466 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 64.0, half 64.0>
6467 %mul = fmul contract <2 x half> %x, %select.pow2
6468 %fma = fadd contract <2 x half> %mul, %y
6472 define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6473 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4:
6474 ; GFX9-SDAG: ; %bb.0:
6475 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6476 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400
6477 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5800
6478 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6479 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6480 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6481 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6482 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6483 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6484 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6486 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
6487 ; GFX9-GISEL: ; %bb.0:
6488 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6489 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
6490 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400
6491 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6492 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6493 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6494 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6495 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6496 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6497 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6498 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6500 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4:
6501 ; GFX10-SDAG: ; %bb.0:
6502 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6503 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
6504 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6505 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
6506 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6507 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v4, vcc_lo
6508 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6509 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6510 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6512 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
6513 ; GFX10-GISEL: ; %bb.0:
6514 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
6516 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6517 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
6518 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6519 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6520 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
6521 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6522 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6523 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6525 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4:
6526 ; GFX11-SDAG: ; %bb.0:
6527 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6528 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
6529 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6530 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
6531 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6532 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v4, vcc_lo
6533 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6534 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6535 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6537 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
6538 ; GFX11-GISEL: ; %bb.0:
6539 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6540 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
6541 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6542 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
6543 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6544 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6545 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
6546 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6547 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6548 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6549 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6550 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 4.0, half 4.0>
6551 %mul = fmul contract <2 x half> %x, %select.pow2
6552 %fma = fadd contract <2 x half> %mul, %y
6556 define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6557 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4:
6558 ; GFX9-SDAG: ; %bb.0:
6559 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6560 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400
6561 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
6562 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6563 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6564 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6565 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6566 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6567 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6568 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6570 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
6571 ; GFX9-GISEL: ; %bb.0:
6572 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6573 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4000
6574 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400
6575 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6576 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6577 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6578 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6579 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6580 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6581 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6582 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6584 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4:
6585 ; GFX10-SDAG: ; %bb.0:
6586 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6587 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000
6588 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6589 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
6590 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6591 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v4, vcc_lo
6592 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6593 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6594 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6596 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
6597 ; GFX10-GISEL: ; %bb.0:
6598 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6599 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
6600 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6601 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo
6602 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6603 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6604 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo
6605 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6606 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6607 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6609 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4:
6610 ; GFX11-SDAG: ; %bb.0:
6611 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6612 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000
6613 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6614 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
6615 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6616 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x4400, v4, vcc_lo
6617 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6618 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6619 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6621 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
6622 ; GFX11-GISEL: ; %bb.0:
6623 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6624 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
6625 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6626 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo
6627 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6628 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6629 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo
6630 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6631 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6632 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6633 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6634 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 4.0, half 4.0>
6635 %mul = fmul contract <2 x half> %x, %select.pow2
6636 %fma = fadd contract <2 x half> %mul, %y
6640 define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x half> %x, <2 x half> %y) {
6641 ; GFX9-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128:
6642 ; GFX9-SDAG: ; %bb.0:
6643 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6644 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x5800
6645 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x4400
6646 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6647 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
6648 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6649 ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
6650 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6651 ; GFX9-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6652 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6654 ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
6655 ; GFX9-GISEL: ; %bb.0:
6656 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6657 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
6658 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5800
6659 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
6660 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
6661 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
6662 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
6663 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6664 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6665 ; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6666 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6668 ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128:
6669 ; GFX10-SDAG: ; %bb.0:
6670 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6671 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400
6672 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6673 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5800, v4, vcc_lo
6674 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6675 ; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5800, v4, vcc_lo
6676 ; GFX10-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6677 ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6678 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6680 ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
6681 ; GFX10-GISEL: ; %bb.0:
6682 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6683 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
6684 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6685 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo
6686 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6687 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6688 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo
6689 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6690 ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6691 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6693 ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128:
6694 ; GFX11-SDAG: ; %bb.0:
6695 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6696 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400
6697 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6698 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x5800, v4, vcc_lo
6699 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6700 ; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5800, v4, vcc_lo
6701 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6702 ; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6703 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6705 ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
6706 ; GFX11-GISEL: ; %bb.0:
6707 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6708 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
6709 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
6710 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo
6711 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
6712 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6713 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo
6714 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
6715 ; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
6716 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6717 %cond = icmp eq <2 x i32> %arg, zeroinitializer
6718 %select.pow2 = select <2 x i1> %cond, <2 x half> <half 4.0, half 4.0>, <2 x half> <half 128.0, half 128.0>
6719 %mul = fmul contract <2 x half> %x, %select.pow2
6720 %fma = fadd contract <2 x half> %mul, %y
6724 ;---------------------------------------------------------------------
6726 ;---------------------------------------------------------------------
6728 define float @v_constrained_fmul_32_f32(float %x, float %y) #0 {
6729 ; GCN-LABEL: v_constrained_fmul_32_f32:
6731 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6732 ; GCN-NEXT: v_mul_f32_e32 v0, 0x42000000, v0
6733 ; GCN-NEXT: s_setpc_b64 s[30:31]
6734 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float 32.0, metadata !"round.dynamic", metadata !"fpexcept.strict")
6738 define double @v_constrained_fmul_32_f64(double %x, double %y) #0 {
6739 ; GFX9-SDAG-LABEL: v_constrained_fmul_32_f64:
6740 ; GFX9-SDAG: ; %bb.0:
6741 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6742 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
6743 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6745 ; GFX9-GISEL-LABEL: v_constrained_fmul_32_f64:
6746 ; GFX9-GISEL: ; %bb.0:
6747 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6748 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
6749 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000
6750 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
6751 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6753 ; GFX10-SDAG-LABEL: v_constrained_fmul_32_f64:
6754 ; GFX10-SDAG: ; %bb.0:
6755 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6756 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
6757 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6759 ; GFX10-GISEL-LABEL: v_constrained_fmul_32_f64:
6760 ; GFX10-GISEL: ; %bb.0:
6761 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6762 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40400000
6763 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6765 ; GFX11-SDAG-LABEL: v_constrained_fmul_32_f64:
6766 ; GFX11-SDAG: ; %bb.0:
6767 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6768 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5
6769 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6771 ; GFX11-GISEL-LABEL: v_constrained_fmul_32_f64:
6772 ; GFX11-GISEL: ; %bb.0:
6773 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6774 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40400000
6775 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6776 %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 32.0, metadata !"round.dynamic", metadata !"fpexcept.strict")
6780 define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 {
6781 ; GFX9-SDAG-LABEL: v_constrained_fmul_0x1p64_f64:
6782 ; GFX9-SDAG: ; %bb.0:
6783 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6784 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
6785 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6787 ; GFX9-GISEL-LABEL: v_constrained_fmul_0x1p64_f64:
6788 ; GFX9-GISEL: ; %bb.0:
6789 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6790 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
6791 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43f00000
6792 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
6793 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6795 ; GFX10-SDAG-LABEL: v_constrained_fmul_0x1p64_f64:
6796 ; GFX10-SDAG: ; %bb.0:
6797 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6798 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
6799 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6801 ; GFX10-GISEL-LABEL: v_constrained_fmul_0x1p64_f64:
6802 ; GFX10-GISEL: ; %bb.0:
6803 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6804 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x43f00000
6805 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6807 ; GFX11-SDAG-LABEL: v_constrained_fmul_0x1p64_f64:
6808 ; GFX11-SDAG: ; %bb.0:
6809 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6810 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64
6811 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6813 ; GFX11-GISEL-LABEL: v_constrained_fmul_0x1p64_f64:
6814 ; GFX11-GISEL: ; %bb.0:
6815 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6816 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x43f00000
6817 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6818 %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 18446744073709551616.0, metadata !"round.dynamic", metadata !"fpexcept.strict")
6822 define half @v_constrained_fmul_32_f16(half %x, half %y) #0 {
6823 ; GCN-LABEL: v_constrained_fmul_32_f16:
6825 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6826 ; GCN-NEXT: v_mul_f16_e32 v0, 0x5000, v0
6827 ; GCN-NEXT: s_setpc_b64 s[30:31]
6828 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half 32.0, metadata !"round.dynamic", metadata !"fpexcept.strict")
6832 define double @v_mul_fabs_0x1pn1031_f64(double %x) {
6833 ; GFX9-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
6834 ; GFX9-SDAG: ; %bb.0:
6835 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6836 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfbf9
6837 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, s4
6838 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6840 ; GFX9-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
6841 ; GFX9-GISEL: ; %bb.0:
6842 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6843 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
6844 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x800
6845 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
6846 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6848 ; GFX10-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
6849 ; GFX10-SDAG: ; %bb.0:
6850 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6851 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 0xfffffbf9
6852 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6854 ; GFX10-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
6855 ; GFX10-GISEL: ; %bb.0:
6856 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6857 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, |v[0:1]|
6858 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6860 ; GFX11-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
6861 ; GFX11-SDAG: ; %bb.0:
6862 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6863 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 0xfffffbf9
6864 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6866 ; GFX11-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
6867 ; GFX11-GISEL: ; %bb.0:
6868 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6869 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, |v[0:1]|
6870 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6871 %fabs.x = call double @llvm.fabs.f64(double %x)
6872 %mul = fmul double %fabs.x, 4.34584737989687770135e-311
6876 define double @v_mul_fabs_neg256_f64(double %x) {
6877 ; GFX9-SDAG-LABEL: v_mul_fabs_neg256_f64:
6878 ; GFX9-SDAG: ; %bb.0:
6879 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6880 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
6881 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6883 ; GFX9-GISEL-LABEL: v_mul_fabs_neg256_f64:
6884 ; GFX9-GISEL: ; %bb.0:
6885 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6886 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
6887 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0700000
6888 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
6889 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6891 ; GFX10-SDAG-LABEL: v_mul_fabs_neg256_f64:
6892 ; GFX10-SDAG: ; %bb.0:
6893 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6894 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
6895 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6897 ; GFX10-GISEL-LABEL: v_mul_fabs_neg256_f64:
6898 ; GFX10-GISEL: ; %bb.0:
6899 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6900 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, |v[0:1]|
6901 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6903 ; GFX11-SDAG-LABEL: v_mul_fabs_neg256_f64:
6904 ; GFX11-SDAG: ; %bb.0:
6905 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6906 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
6907 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6909 ; GFX11-GISEL-LABEL: v_mul_fabs_neg256_f64:
6910 ; GFX11-GISEL: ; %bb.0:
6911 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6912 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, |v[0:1]|
6913 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6914 %fabs.x = call double @llvm.fabs.f64(double %x)
6915 %mul = fmul double %fabs.x, -256.0
6919 define double @v_mul_fabs_neg8_f64(double %x) {
6920 ; GFX9-SDAG-LABEL: v_mul_fabs_neg8_f64:
6921 ; GFX9-SDAG: ; %bb.0:
6922 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6923 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
6924 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6926 ; GFX9-GISEL-LABEL: v_mul_fabs_neg8_f64:
6927 ; GFX9-GISEL: ; %bb.0:
6928 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6929 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
6930 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0200000
6931 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
6932 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6934 ; GFX10-SDAG-LABEL: v_mul_fabs_neg8_f64:
6935 ; GFX10-SDAG: ; %bb.0:
6936 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6937 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
6938 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6940 ; GFX10-GISEL-LABEL: v_mul_fabs_neg8_f64:
6941 ; GFX10-GISEL: ; %bb.0:
6942 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6943 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, |v[0:1]|
6944 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6946 ; GFX11-SDAG-LABEL: v_mul_fabs_neg8_f64:
6947 ; GFX11-SDAG: ; %bb.0:
6948 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6949 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
6950 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
6952 ; GFX11-GISEL-LABEL: v_mul_fabs_neg8_f64:
6953 ; GFX11-GISEL: ; %bb.0:
6954 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6955 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, |v[0:1]|
6956 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
6957 %fabs.x = call double @llvm.fabs.f64(double %x)
6958 %mul = fmul double %fabs.x, -8.0
6962 define double @v_mul_fabs_neg4_f64(double %x) {
6963 ; GCN-LABEL: v_mul_fabs_neg4_f64:
6965 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6966 ; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -4.0
6967 ; GCN-NEXT: s_setpc_b64 s[30:31]
6968 %fabs.x = call double @llvm.fabs.f64(double %x)
6969 %mul = fmul double %fabs.x, -4.0
6973 define double @v_mul_fabs_neg2_f64(double %x) {
6974 ; GCN-LABEL: v_mul_fabs_neg2_f64:
6976 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6977 ; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -2.0
6978 ; GCN-NEXT: s_setpc_b64 s[30:31]
6979 %fabs.x = call double @llvm.fabs.f64(double %x)
6980 %mul = fmul double %fabs.x, -2.0
6984 define double @v_mul_fabs_neg1_f64(double %x) {
6985 ; GFX9-SDAG-LABEL: v_mul_fabs_neg1_f64:
6986 ; GFX9-SDAG: ; %bb.0:
6987 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6988 ; GFX9-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
6989 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6991 ; GFX9-GISEL-LABEL: v_mul_fabs_neg1_f64:
6992 ; GFX9-GISEL: ; %bb.0:
6993 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6994 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
6995 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6997 ; GFX10-SDAG-LABEL: v_mul_fabs_neg1_f64:
6998 ; GFX10-SDAG: ; %bb.0:
6999 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7000 ; GFX10-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
7001 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
7003 ; GFX10-GISEL-LABEL: v_mul_fabs_neg1_f64:
7004 ; GFX10-GISEL: ; %bb.0:
7005 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7006 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
7007 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
7009 ; GFX11-SDAG-LABEL: v_mul_fabs_neg1_f64:
7010 ; GFX11-SDAG: ; %bb.0:
7011 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7012 ; GFX11-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
7013 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
7015 ; GFX11-GISEL-LABEL: v_mul_fabs_neg1_f64:
7016 ; GFX11-GISEL: ; %bb.0:
7017 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7018 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
7019 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
7020 %fabs.x = call double @llvm.fabs.f64(double %x)
7021 %mul = fmul double %fabs.x, -1.0
7025 define double @v_mul_fabs_neghalf_f64(double %x) {
7026 ; GCN-LABEL: v_mul_fabs_neghalf_f64:
7028 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7029 ; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -0.5
7030 ; GCN-NEXT: s_setpc_b64 s[30:31]
7031 %fabs.x = call double @llvm.fabs.f64(double %x)
7032 %mul = fmul double %fabs.x, -0.5
7036 define double @v_mul_fabs_negquarter_f64(double %x) {
7037 ; GFX9-SDAG-LABEL: v_mul_fabs_negquarter_f64:
7038 ; GFX9-SDAG: ; %bb.0:
7039 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7040 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
7041 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
7043 ; GFX9-GISEL-LABEL: v_mul_fabs_negquarter_f64:
7044 ; GFX9-GISEL: ; %bb.0:
7045 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7046 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
7047 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbfd00000
7048 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
7049 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
7051 ; GFX10-SDAG-LABEL: v_mul_fabs_negquarter_f64:
7052 ; GFX10-SDAG: ; %bb.0:
7053 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7054 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
7055 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
7057 ; GFX10-GISEL-LABEL: v_mul_fabs_negquarter_f64:
7058 ; GFX10-GISEL: ; %bb.0:
7059 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7060 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, |v[0:1]|
7061 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
7063 ; GFX11-SDAG-LABEL: v_mul_fabs_negquarter_f64:
7064 ; GFX11-SDAG: ; %bb.0:
7065 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7066 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
7067 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
7069 ; GFX11-GISEL-LABEL: v_mul_fabs_negquarter_f64:
7070 ; GFX11-GISEL: ; %bb.0:
7071 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7072 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, |v[0:1]|
7073 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
7074 %fabs.x = call double @llvm.fabs.f64(double %x)
7075 %mul = fmul double %fabs.x, -0.25
7079 define double @v_mul_fabs_quarter_f64(double %x) {
7080 ; GFX9-SDAG-LABEL: v_mul_fabs_quarter_f64:
7081 ; GFX9-SDAG: ; %bb.0:
7082 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7083 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
7084 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
7086 ; GFX9-GISEL-LABEL: v_mul_fabs_quarter_f64:
7087 ; GFX9-GISEL: ; %bb.0:
7088 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7089 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
7090 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fd00000
7091 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
7092 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
7094 ; GFX10-SDAG-LABEL: v_mul_fabs_quarter_f64:
7095 ; GFX10-SDAG: ; %bb.0:
7096 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7097 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
7098 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
7100 ; GFX10-GISEL-LABEL: v_mul_fabs_quarter_f64:
7101 ; GFX10-GISEL: ; %bb.0:
7102 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7103 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, |v[0:1]|
7104 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
7106 ; GFX11-SDAG-LABEL: v_mul_fabs_quarter_f64:
7107 ; GFX11-SDAG: ; %bb.0:
7108 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7109 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
7110 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
7112 ; GFX11-GISEL-LABEL: v_mul_fabs_quarter_f64:
7113 ; GFX11-GISEL: ; %bb.0:
7114 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7115 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, |v[0:1]|
7116 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
7117 %fabs.x = call double @llvm.fabs.f64(double %x)
7118 %mul = fmul double %fabs.x, 0.25
7122 define double @v_mul_fabs_half_f64(double %x) {
7123 ; GCN-LABEL: v_mul_fabs_half_f64:
7125 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7126 ; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0.5
7127 ; GCN-NEXT: s_setpc_b64 s[30:31]
7128 %fabs.x = call double @llvm.fabs.f64(double %x)
7129 %mul = fmul double %fabs.x, 0.5
7133 define double @v_mul_fabs_1_f64(double %x) {
7134 ; GCN-LABEL: v_mul_fabs_1_f64:
7136 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7137 ; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
7138 ; GCN-NEXT: s_setpc_b64 s[30:31]
7139 %fabs.x = call double @llvm.fabs.f64(double %x)
7140 %mul = fmul double %fabs.x, 1.0
7144 define double @v_mul_fabs_2_f64(double %x) {
7145 ; GFX9-SDAG-LABEL: v_mul_fabs_2_f64:
7146 ; GFX9-SDAG: ; %bb.0:
7147 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7148 ; GFX9-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
7149 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
7151 ; GFX9-GISEL-LABEL: v_mul_fabs_2_f64:
7152 ; GFX9-GISEL: ; %bb.0:
7153 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7154 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
7155 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
7157 ; GFX10-SDAG-LABEL: v_mul_fabs_2_f64:
7158 ; GFX10-SDAG: ; %bb.0:
7159 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7160 ; GFX10-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
7161 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
7163 ; GFX10-GISEL-LABEL: v_mul_fabs_2_f64:
7164 ; GFX10-GISEL: ; %bb.0:
7165 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7166 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
7167 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
7169 ; GFX11-SDAG-LABEL: v_mul_fabs_2_f64:
7170 ; GFX11-SDAG: ; %bb.0:
7171 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7172 ; GFX11-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
7173 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
7175 ; GFX11-GISEL-LABEL: v_mul_fabs_2_f64:
7176 ; GFX11-GISEL: ; %bb.0:
7177 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7178 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
7179 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
7180 %fabs.x = call double @llvm.fabs.f64(double %x)
7181 %mul = fmul double %fabs.x, 2.0
7185 define double @v_mul_fabs_4_f64(double %x) {
7186 ; GCN-LABEL: v_mul_fabs_4_f64:
7188 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7189 ; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 4.0
7190 ; GCN-NEXT: s_setpc_b64 s[30:31]
7191 %fabs.x = call double @llvm.fabs.f64(double %x)
7192 %mul = fmul double %fabs.x, 4.0
7196 define double @v_mul_fabs_8_f64(double %x) {
7197 ; GFX9-SDAG-LABEL: v_mul_fabs_8_f64:
7198 ; GFX9-SDAG: ; %bb.0:
7199 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7200 ; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
7201 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
7203 ; GFX9-GISEL-LABEL: v_mul_fabs_8_f64:
7204 ; GFX9-GISEL: ; %bb.0:
7205 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7206 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
7207 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40200000
7208 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3]
7209 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
7211 ; GFX10-SDAG-LABEL: v_mul_fabs_8_f64:
7212 ; GFX10-SDAG: ; %bb.0:
7213 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7214 ; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
7215 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
7217 ; GFX10-GISEL-LABEL: v_mul_fabs_8_f64:
7218 ; GFX10-GISEL: ; %bb.0:
7219 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7220 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, |v[0:1]|
7221 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
7223 ; GFX11-SDAG-LABEL: v_mul_fabs_8_f64:
7224 ; GFX11-SDAG: ; %bb.0:
7225 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7226 ; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
7227 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
7229 ; GFX11-GISEL-LABEL: v_mul_fabs_8_f64:
7230 ; GFX11-GISEL: ; %bb.0:
7231 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7232 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, |v[0:1]|
7233 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
7234 %fabs.x = call double @llvm.fabs.f64(double %x)
7235 %mul = fmul double %fabs.x, 8.0
7239 declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
7240 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
7241 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
7243 attributes #0 = { strictfp }