1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1
6 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
7 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
11 define i32 @reduce_i64(i32 %arg) {
12 ; SSE2-LABEL: 'reduce_i64'
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
17 ; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
18 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
20 ; SSSE3-LABEL: 'reduce_i64'
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
25 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
26 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
28 ; SSE42-LABEL: 'reduce_i64'
29 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
30 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
31 ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
33 ; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
34 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
36 ; AVX1-LABEL: 'reduce_i64'
37 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
41 ; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
42 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
44 ; AVX2-LABEL: 'reduce_i64'
45 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
46 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
47 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
48 ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
49 ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
50 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
52 ; AVX512F-LABEL: 'reduce_i64'
53 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
54 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
55 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
56 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
57 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
58 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
60 ; AVX512BW-LABEL: 'reduce_i64'
61 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
62 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
63 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
64 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
65 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
66 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
68 ; AVX512DQ-LABEL: 'reduce_i64'
69 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
70 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
71 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
72 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
73 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
74 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
76 %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
77 %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
78 %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
79 %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
80 %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
84 define i32 @reduce_i32(i32 %arg) {
85 ; SSE2-LABEL: 'reduce_i32'
86 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
87 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
88 ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
89 ; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
90 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
91 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
93 ; SSSE3-LABEL: 'reduce_i32'
94 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
95 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
96 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
97 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
98 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
99 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
101 ; SSE42-LABEL: 'reduce_i32'
102 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
103 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
104 ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
105 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
106 ; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
107 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
109 ; AVX1-LABEL: 'reduce_i32'
110 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
111 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
112 ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
113 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
114 ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
115 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
117 ; AVX2-LABEL: 'reduce_i32'
118 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
119 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
120 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
121 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
122 ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
123 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
125 ; AVX512-LABEL: 'reduce_i32'
126 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
127 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
128 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
129 ; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
130 ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
131 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133 %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
134 %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
135 %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
136 %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
137 %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
141 define i32 @reduce_i16(i32 %arg) {
142 ; SSE-LABEL: 'reduce_i16'
143 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
144 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
145 ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
146 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
147 ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
148 ; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
149 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
151 ; AVX1-LABEL: 'reduce_i16'
152 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160 ; AVX2-LABEL: 'reduce_i16'
161 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
162 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
163 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
164 ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
165 ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
166 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
167 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
169 ; AVX512F-LABEL: 'reduce_i16'
170 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
171 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
172 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
173 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
174 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
175 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
176 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
178 ; AVX512BW-LABEL: 'reduce_i16'
179 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
180 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
181 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
182 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
183 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
184 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
185 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
187 ; AVX512DQ-LABEL: 'reduce_i16'
188 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
189 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
190 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
191 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
192 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
193 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
194 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
196 %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
197 %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
198 %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
199 %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
200 %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
201 %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
205 define i32 @reduce_i8(i32 %arg) {
206 ; SSE-LABEL: 'reduce_i8'
207 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
208 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
209 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
210 ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
211 ; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
212 ; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
213 ; SSE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
214 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216 ; AVX1-LABEL: 'reduce_i8'
217 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
218 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
219 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
220 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
221 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
222 ; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
223 ; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
224 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226 ; AVX2-LABEL: 'reduce_i8'
227 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
228 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
229 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
230 ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
231 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
232 ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
233 ; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
234 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236 ; AVX512F-LABEL: 'reduce_i8'
237 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
238 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
239 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
240 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
241 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
242 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
243 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
244 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246 ; AVX512BW-LABEL: 'reduce_i8'
247 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
248 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
249 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
250 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
251 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
252 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
253 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
254 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
256 ; AVX512DQ-LABEL: 'reduce_i8'
257 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
258 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
259 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
260 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
261 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
262 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
263 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
264 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
266 %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
267 %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
268 %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
269 %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
270 %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
271 %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
272 %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
276 declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>)
277 declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
278 declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
279 declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
280 declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
282 declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
283 declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
284 declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
285 declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
286 declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>)
288 declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
289 declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
290 declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
291 declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
292 declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>)
293 declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>)
295 declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
296 declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
297 declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
298 declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
299 declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
300 declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>)
301 declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)