1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
3 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
4 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
6 define void @reduce_fmul_bfloat() {
7 ; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
8 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
9 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
10 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
11 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
12 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
13 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
14 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
15 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
16 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
17 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
18 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
19 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
20 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
21 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
22 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
24 ; SIZE-LABEL: 'reduce_fmul_bfloat'
25 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
26 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
27 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
28 ; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
29 ; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
30 ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
31 ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
32 ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
33 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
34 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
35 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
36 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
37 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
38 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
39 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
41 %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
42 %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
43 %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
44 %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
45 %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
46 %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
47 %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
48 %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
49 %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
50 %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
51 %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
52 %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
53 %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
54 %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
58 define void @reduce_fmul_half() {
59 ; FP-REDUCE-ZVFH-LABEL: 'reduce_fmul_half'
60 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
61 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
62 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
63 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
64 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
65 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
66 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
67 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
68 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
69 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
70 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
71 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
72 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
73 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
74 ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
76 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
77 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
78 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
79 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
80 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
81 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
82 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
83 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
84 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
85 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
86 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
87 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
88 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
89 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
90 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
91 ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
93 ; SIZE-LABEL: 'reduce_fmul_half'
94 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
95 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
96 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
97 ; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
98 ; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
99 ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
100 ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
101 ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
102 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
103 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
104 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
105 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
106 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
107 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
108 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
110 %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
111 %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
112 %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
113 %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
114 %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
115 %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
116 %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
117 %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
118 %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
119 %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
120 %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
121 %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
122 %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
123 %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
127 define void @reduce_fmul_float() {
128 ; FP-REDUCE-LABEL: 'reduce_fmul_float'
129 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
130 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
131 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
132 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
133 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
134 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
135 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
136 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
137 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
138 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
139 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
140 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
141 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
142 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
144 ; SIZE-LABEL: 'reduce_fmul_float'
145 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
146 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
147 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
148 ; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
149 ; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
150 ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
151 ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
152 ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
153 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
154 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
155 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
156 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
157 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
158 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
160 %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
161 %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
162 %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
163 %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
164 %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
165 %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
166 %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
167 %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
168 %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
169 %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
170 %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
171 %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
172 %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
176 define void @reduce_fmul_double() {
177 ; FP-REDUCE-LABEL: 'reduce_fmul_double'
178 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
179 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
180 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
181 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
182 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 361 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
183 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
184 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
185 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
186 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
187 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
188 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
189 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
190 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
192 ; SIZE-LABEL: 'reduce_fmul_double'
193 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
194 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
195 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
196 ; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
197 ; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
198 ; SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
199 ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
200 ; SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
201 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
202 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
203 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
204 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
205 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
207 %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
208 %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
209 %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
210 %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
211 %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
212 %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
213 %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
214 %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
215 %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
216 %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
217 %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
218 %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
222 define void @reduce_ordered_fmul_bfloat() {
223 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_bfloat'
224 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
225 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
226 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
227 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
228 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
229 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
230 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
231 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
232 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
233 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
234 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
235 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
236 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
237 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
238 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
240 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
241 ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
242 ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
243 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
244 ; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
245 ; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
246 ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
247 ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
248 ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
249 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
250 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
251 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
252 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
253 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
254 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
255 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
257 %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
258 %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
259 %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
260 %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
261 %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
262 %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
263 %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
264 %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
265 %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
266 %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
267 %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
268 %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
269 %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
270 %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
274 define void @reduce_ordered_fmul_half() {
275 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_half'
276 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
277 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
278 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
279 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
280 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
281 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
282 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
283 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
284 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
285 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
286 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
287 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
288 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
289 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
290 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
292 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
293 ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
294 ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
295 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
296 ; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
297 ; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
298 ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
299 ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
300 ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
301 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
302 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
303 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
304 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
305 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
306 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
307 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
309 %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
310 %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
311 %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
312 %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
313 %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
314 %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
315 %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
316 %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
317 %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
318 %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
319 %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
320 %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
321 %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
322 %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
326 define void @reduce_ordered_fmul_float() {
327 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_float'
328 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
329 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
330 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
331 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
332 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
333 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
334 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
335 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
336 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
337 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
338 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
339 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
340 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
341 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
343 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
344 ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
345 ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
346 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
347 ; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
348 ; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
349 ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
350 ; SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
351 ; SIZE-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
352 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
353 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
354 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
355 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
356 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
357 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
359 %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
360 %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
361 %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
362 %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
363 %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
364 %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
365 %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
366 %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
367 %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
368 %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
369 %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
370 %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
371 %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
375 define void @reduce_ordered_fmul_double() {
376 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_double'
377 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
378 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
379 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
380 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
381 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
382 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
383 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
384 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
385 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
386 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
387 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
388 ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
389 ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
391 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
392 ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
393 ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
394 ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
395 ; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
396 ; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
397 ; SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
398 ; SIZE-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
399 ; SIZE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
400 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
401 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
402 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
403 ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
404 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
406 %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
407 %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
408 %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
409 %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
410 %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
411 %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
412 %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
413 %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
414 %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
415 %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
416 %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
417 %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)