1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
5 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
6 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
7 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
8 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
9 ; RUN: opt < %s -x86-experimental-vector-widening-legalization -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
11 define i32 @reduce_i64(i32 %arg) {
12 ; SSE2-LABEL: 'reduce_i64'
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
17 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
18 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
20 ; SSSE3-LABEL: 'reduce_i64'
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
25 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
26 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
28 ; SSE42-LABEL: 'reduce_i64'
29 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
30 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
31 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
33 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
34 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
36 ; AVX-LABEL: 'reduce_i64'
37 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
38 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
39 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
40 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
41 ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
42 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
44 ; AVX512-LABEL: 'reduce_i64'
45 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
46 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
47 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
48 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
49 ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
50 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
52 %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
53 %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
54 %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
55 %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
56 %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
60 define i32 @reduce_i32(i32 %arg) {
61 ; SSE2-LABEL: 'reduce_i32'
62 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
65 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
66 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
67 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
69 ; SSSE3-LABEL: 'reduce_i32'
70 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
71 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
72 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
73 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
74 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
75 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
77 ; SSE42-LABEL: 'reduce_i32'
78 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
79 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
80 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
81 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
82 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
83 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
85 ; AVX-LABEL: 'reduce_i32'
86 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
87 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
88 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
89 ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
90 ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
91 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
93 ; AVX512-LABEL: 'reduce_i32'
94 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
95 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
96 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
97 ; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
98 ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
99 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
101 %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
102 %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
103 %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
104 %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
105 %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
109 define i32 @reduce_i16(i32 %arg) {
110 ; SSE2-LABEL: 'reduce_i16'
111 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
112 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
113 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
114 ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
115 ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
116 ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
117 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
119 ; SSSE3-LABEL: 'reduce_i16'
120 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
121 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
122 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
123 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
124 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
125 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
126 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
128 ; SSE42-LABEL: 'reduce_i16'
129 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
130 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
131 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
132 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
133 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
134 ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
135 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
137 ; AVX1-LABEL: 'reduce_i16'
138 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
139 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
140 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
142 ; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
143 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
144 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
146 ; AVX2-LABEL: 'reduce_i16'
147 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
148 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
149 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
150 ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
151 ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
152 ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
153 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
155 ; AVX512F-LABEL: 'reduce_i16'
156 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
157 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
158 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
159 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
160 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
161 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
162 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
164 ; AVX512BW-LABEL: 'reduce_i16'
165 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
166 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
167 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
168 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
169 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
170 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
171 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
173 ; AVX512DQ-LABEL: 'reduce_i16'
174 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
175 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
176 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
177 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
178 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
179 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
180 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
182 %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
183 %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
184 %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
185 %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
186 %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
187 %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
191 define i32 @reduce_i8(i32 %arg) {
192 ; SSE2-LABEL: 'reduce_i8'
193 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
194 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
195 ; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
196 ; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
197 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
198 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
199 ; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
200 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
202 ; SSSE3-LABEL: 'reduce_i8'
203 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
204 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
205 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
206 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
207 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
208 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
209 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
210 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
212 ; SSE42-LABEL: 'reduce_i8'
213 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
214 ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
215 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
216 ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
217 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
218 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
219 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
220 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
222 ; AVX1-LABEL: 'reduce_i8'
223 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
224 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
225 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
226 ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
227 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
228 ; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
229 ; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
230 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
232 ; AVX2-LABEL: 'reduce_i8'
233 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
234 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
235 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
236 ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
237 ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
238 ; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
239 ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
240 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
242 ; AVX512F-LABEL: 'reduce_i8'
243 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
244 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
245 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
246 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
247 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
248 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
249 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
250 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
252 ; AVX512BW-LABEL: 'reduce_i8'
253 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
254 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
255 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
256 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
257 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
258 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
259 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
260 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
262 ; AVX512DQ-LABEL: 'reduce_i8'
263 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
264 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
265 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
266 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
267 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
268 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
269 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
270 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
272 %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
273 %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
274 %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
275 %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
276 %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
277 %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
278 %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
282 declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>)
283 declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
284 declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
285 declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
286 declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
288 declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
289 declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
290 declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
291 declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
292 declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
294 declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>)
295 declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
296 declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
297 declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
298 declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
299 declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
301 declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>)
302 declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>)
303 declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
304 declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
305 declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
306 declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
307 declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)