1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
4 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6 define void @add_i8() {
7 ; CHECK-LABEL: 'add_i8'
8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
9 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
10 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
11 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
12 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
13 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
15 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
17 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
19 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
21 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
23 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
28 define void @add_i16() {
29 ; CHECK-LABEL: 'add_i16'
30 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
31 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
33 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
34 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
35 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
36 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
37 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
38 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
39 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
40 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
41 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
42 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
43 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
44 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
45 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
46 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
47 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
48 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
49 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
50 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
51 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
52 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
53 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
54 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
55 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
57 %a0za = zext <1 x i8> undef to <1 x i16>
58 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
60 %a0sa = sext <1 x i8> undef to <1 x i16>
61 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
63 %a1za = zext <2 x i8> undef to <2 x i16>
64 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
66 %a1sa = sext <2 x i8> undef to <2 x i16>
67 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
69 %a2za = zext <4 x i8> undef to <4 x i16>
70 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
72 %a2sa = sext <4 x i8> undef to <4 x i16>
73 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
75 %a3za = zext <8 x i8> undef to <8 x i16>
76 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
78 %a3sa = sext <8 x i8> undef to <8 x i16>
79 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
81 %a4za = zext <16 x i8> undef to <16 x i16>
82 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
84 %a4sa = sext <16 x i8> undef to <16 x i16>
85 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
87 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
89 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
91 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
93 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
95 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
100 define void @add_i32() {
101 ; CHECK-LABEL: 'add_i32'
102 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
103 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
104 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
105 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
106 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
107 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
108 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
109 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
110 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
111 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
112 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
113 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
114 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
115 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
116 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
117 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
118 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
119 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
120 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
121 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
122 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
123 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
124 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
125 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
126 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
127 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
129 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
130 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
131 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
132 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
133 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
134 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
135 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
136 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
137 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
138 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
139 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
140 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
141 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
142 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
143 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
144 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
145 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
146 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
147 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
149 %a0za = zext <1 x i8> undef to <1 x i32>
150 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
152 %a0sa = sext <1 x i8> undef to <1 x i32>
153 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
155 %a1za = zext <2 x i8> undef to <2 x i32>
156 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
158 %a1sa = sext <2 x i8> undef to <2 x i32>
159 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
161 %a2za = zext <4 x i8> undef to <4 x i32>
162 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
164 %a2sa = sext <4 x i8> undef to <4 x i32>
165 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
167 %a3za = zext <8 x i8> undef to <8 x i32>
168 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
170 %a3sa = sext <8 x i8> undef to <8 x i32>
171 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
173 %a4za = zext <16 x i8> undef to <16 x i32>
174 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
176 %a4sa = sext <16 x i8> undef to <16 x i32>
177 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
179 %a5za = zext <1 x i16> undef to <1 x i32>
180 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
182 %a5sa = sext <1 x i16> undef to <1 x i32>
183 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
185 %a6za = zext <2 x i16> undef to <2 x i32>
186 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
188 %a6sa = sext <2 x i16> undef to <2 x i32>
189 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
191 %a7za = zext <4 x i16> undef to <4 x i32>
192 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
194 %a7sa = sext <4 x i16> undef to <4 x i32>
195 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
197 %a8za = zext <8 x i16> undef to <8 x i32>
198 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
200 %a8sa = sext <8 x i16> undef to <8 x i32>
201 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
203 %a9za = zext <16 x i16> undef to <16 x i32>
204 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
206 %a9sa = sext <16 x i16> undef to <16 x i32>
207 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
209 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
211 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
213 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
215 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
217 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
222 define void @add_i64() {
223 ; CHECK-LABEL: 'add_i64'
224 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
225 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
226 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
227 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
228 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
229 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
230 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
231 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
232 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
233 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
234 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
235 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
236 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
237 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
238 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
239 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
240 ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
241 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
242 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
243 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
244 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
245 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
246 ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
247 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
248 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
249 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
250 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
251 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
252 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
253 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
254 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
255 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
257 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
258 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
259 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
260 ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
261 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
262 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
263 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
264 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
265 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
266 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
267 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
268 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
269 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
270 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
271 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
272 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
273 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
274 ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
275 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
276 ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
277 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
278 ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
279 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
280 ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
281 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
282 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
283 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
284 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
285 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
286 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
287 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
288 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
289 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
291 %a0za = zext <1 x i8> undef to <1 x i64>
292 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
294 %a0sa = sext <1 x i8> undef to <1 x i64>
295 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
297 %a1za = zext <2 x i8> undef to <2 x i64>
298 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
300 %a1sa = sext <2 x i8> undef to <2 x i64>
301 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
303 %a2za = zext <4 x i8> undef to <4 x i64>
304 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
306 %a2sa = sext <4 x i8> undef to <4 x i64>
307 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
309 %a3za = zext <8 x i8> undef to <8 x i64>
310 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
312 %a3sa = sext <8 x i8> undef to <8 x i64>
313 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
315 %a4za = zext <16 x i8> undef to <16 x i64>
316 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
318 %a4sa = sext <16 x i8> undef to <16 x i64>
319 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
321 %a5za = zext <1 x i16> undef to <1 x i64>
322 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
324 %a5sa = sext <1 x i16> undef to <1 x i64>
325 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
327 %a6za = zext <2 x i16> undef to <2 x i64>
328 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
330 %a6sa = sext <2 x i16> undef to <2 x i64>
331 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
333 %a7za = zext <4 x i16> undef to <4 x i64>
334 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
336 %a7sa = sext <4 x i16> undef to <4 x i64>
337 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
339 %a8za = zext <8 x i16> undef to <8 x i64>
340 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
342 %a8sa = sext <8 x i16> undef to <8 x i64>
343 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
345 %a9za = zext <16 x i16> undef to <16 x i64>
346 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
348 %a9sa = sext <16 x i16> undef to <16 x i64>
349 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
351 %a10za = zext <1 x i32> undef to <1 x i64>
352 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
354 %a10sa = sext <1 x i32> undef to <1 x i64>
355 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
357 %a11za = zext <2 x i32> undef to <2 x i64>
358 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
360 %a11sa = sext <2 x i32> undef to <2 x i64>
361 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
363 %a12za = zext <4 x i32> undef to <4 x i64>
364 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
366 %a12sa = sext <4 x i32> undef to <4 x i64>
367 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
369 %a13za = zext <8 x i32> undef to <8 x i64>
370 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
372 %a13sa = sext <8 x i32> undef to <8 x i64>
373 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
375 %a14za = zext <16 x i32> undef to <16 x i64>
376 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
378 %a14sa = sext <16 x i32> undef to <16 x i64>
379 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
381 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
383 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
385 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
387 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
389 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
394 define void @mla_i8() {
395 ; CHECK-LABEL: 'mla_i8'
396 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef
397 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
398 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a1m = mul <2 x i8> undef, undef
399 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
400 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef
401 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
402 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef
403 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
404 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef
405 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
406 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
408 %a0m = mul <1 x i8> undef, undef
409 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
411 %a1m = mul <2 x i8> undef, undef
412 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
414 %a2m = mul <4 x i8> undef, undef
415 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
417 %a3m = mul <8 x i8> undef, undef
418 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
420 %a4m = mul <16 x i8> undef, undef
421 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
426 define void @mla_i16() {
427 ; CHECK-LABEL: 'mla_i16'
428 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
429 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16>
430 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb
431 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
432 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
433 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16>
434 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb
435 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
436 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
437 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16>
438 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb
439 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
440 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
441 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16>
442 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb
443 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
444 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
445 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16>
446 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb
447 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
448 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
449 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16>
450 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb
451 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
452 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
453 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16>
454 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb
455 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
456 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
457 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16>
458 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb
459 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
460 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
461 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16>
462 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb
463 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
464 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
465 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16>
466 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb
467 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
468 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef
469 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
470 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a6m = mul <2 x i16> undef, undef
471 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
472 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef
473 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
474 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef
475 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
476 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef
477 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
478 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
480 %a0za = zext <1 x i8> undef to <1 x i16>
481 %a0zb = zext <1 x i8> undef to <1 x i16>
482 %a0zm = mul <1 x i16> %a0za, %a0zb
483 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
485 %a0sa = sext <1 x i8> undef to <1 x i16>
486 %a0sb = sext <1 x i8> undef to <1 x i16>
487 %a0sm = mul <1 x i16> %a0sa, %a0sb
488 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
490 %a1za = zext <2 x i8> undef to <2 x i16>
491 %a1zb = zext <2 x i8> undef to <2 x i16>
492 %a1zm = mul <2 x i16> %a1za, %a1zb
493 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
495 %a1sa = sext <2 x i8> undef to <2 x i16>
496 %a1sb = sext <2 x i8> undef to <2 x i16>
497 %a1sm = mul <2 x i16> %a1sa, %a1sb
498 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
500 %a2za = zext <4 x i8> undef to <4 x i16>
501 %a2zb = zext <4 x i8> undef to <4 x i16>
502 %a2zm = mul <4 x i16> %a2za, %a2zb
503 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
505 %a2sa = sext <4 x i8> undef to <4 x i16>
506 %a2sb = sext <4 x i8> undef to <4 x i16>
507 %a2sm = mul <4 x i16> %a2sa, %a2sb
508 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
510 %a3za = zext <8 x i8> undef to <8 x i16>
511 %a3zb = zext <8 x i8> undef to <8 x i16>
512 %a3zm = mul <8 x i16> %a3za, %a3zb
513 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
515 %a3sa = sext <8 x i8> undef to <8 x i16>
516 %a3sb = sext <8 x i8> undef to <8 x i16>
517 %a3sm = mul <8 x i16> %a3sa, %a3sb
518 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
520 %a4za = zext <16 x i8> undef to <16 x i16>
521 %a4zb = zext <16 x i8> undef to <16 x i16>
522 %a4zm = mul <16 x i16> %a4za, %a4zb
523 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
525 %a4sa = sext <16 x i8> undef to <16 x i16>
526 %a4sb = sext <16 x i8> undef to <16 x i16>
527 %a4sm = mul <16 x i16> %a4sa, %a4sb
528 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
530 %a5m = mul <1 x i16> undef, undef
531 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
533 %a6m = mul <2 x i16> undef, undef
534 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
536 %a7m = mul <4 x i16> undef, undef
537 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
539 %a8m = mul <8 x i16> undef, undef
540 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
542 %a9m = mul <16 x i16> undef, undef
543 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
548 define void @mla_i32() {
549 ; CHECK-LABEL: 'mla_i32'
550 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
551 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32>
552 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb
553 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
554 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
555 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32>
556 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb
557 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
558 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
559 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32>
560 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb
561 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
562 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
563 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32>
564 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb
565 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
566 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
567 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32>
568 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb
569 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
570 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
571 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32>
572 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb
573 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
574 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
575 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32>
576 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb
577 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
578 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
579 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32>
580 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb
581 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
582 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
583 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32>
584 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb
585 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
586 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
587 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32>
588 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb
589 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
590 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
591 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32>
592 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb
593 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
594 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
595 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32>
596 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb
597 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
598 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
599 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32>
600 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb
601 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
602 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
603 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32>
604 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb
605 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
606 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
607 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32>
608 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb
609 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
610 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
611 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32>
612 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb
613 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
614 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
615 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32>
616 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb
617 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
618 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
619 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32>
620 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb
621 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
622 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
623 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32>
624 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb
625 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
626 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
627 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32>
628 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb
629 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
630 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef
631 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
632 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a11m = mul <2 x i32> undef, undef
633 ; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
634 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef
635 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
636 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef
637 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
638 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef
639 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
640 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
642 %a0za = zext <1 x i8> undef to <1 x i32>
643 %a0zb = zext <1 x i8> undef to <1 x i32>
644 %a0zm = mul <1 x i32> %a0za, %a0zb
645 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
647 %a0sa = sext <1 x i8> undef to <1 x i32>
648 %a0sb = sext <1 x i8> undef to <1 x i32>
649 %a0sm = mul <1 x i32> %a0sa, %a0sb
650 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
652 %a1za = zext <2 x i8> undef to <2 x i32>
653 %a1zb = zext <2 x i8> undef to <2 x i32>
654 %a1zm = mul <2 x i32> %a1za, %a1zb
655 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
657 %a1sa = sext <2 x i8> undef to <2 x i32>
658 %a1sb = sext <2 x i8> undef to <2 x i32>
659 %a1sm = mul <2 x i32> %a1sa, %a1sb
660 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
662 %a2za = zext <4 x i8> undef to <4 x i32>
663 %a2zb = zext <4 x i8> undef to <4 x i32>
664 %a2zm = mul <4 x i32> %a2za, %a2zb
665 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
667 %a2sa = sext <4 x i8> undef to <4 x i32>
668 %a2sb = sext <4 x i8> undef to <4 x i32>
669 %a2sm = mul <4 x i32> %a2sa, %a2sb
670 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
672 %a3za = zext <8 x i8> undef to <8 x i32>
673 %a3zb = zext <8 x i8> undef to <8 x i32>
674 %a3zm = mul <8 x i32> %a3za, %a3zb
675 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
677 %a3sa = sext <8 x i8> undef to <8 x i32>
678 %a3sb = sext <8 x i8> undef to <8 x i32>
679 %a3sm = mul <8 x i32> %a3sa, %a3sb
680 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
682 %a4za = zext <16 x i8> undef to <16 x i32>
683 %a4zb = zext <16 x i8> undef to <16 x i32>
684 %a4zm = mul <16 x i32> %a4za, %a4zb
685 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
687 %a4sa = sext <16 x i8> undef to <16 x i32>
688 %a4sb = sext <16 x i8> undef to <16 x i32>
689 %a4sm = mul <16 x i32> %a4sa, %a4sb
690 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
692 %a5za = zext <1 x i16> undef to <1 x i32>
693 %a5zb = zext <1 x i16> undef to <1 x i32>
694 %a5zm = mul <1 x i32> %a5za, %a5zb
695 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
697 %a5sa = sext <1 x i16> undef to <1 x i32>
698 %a5sb = sext <1 x i16> undef to <1 x i32>
699 %a5sm = mul <1 x i32> %a5sa, %a5sb
700 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
702 %a6za = zext <2 x i16> undef to <2 x i32>
703 %a6zb = zext <2 x i16> undef to <2 x i32>
704 %a6zm = mul <2 x i32> %a6za, %a6zb
705 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
707 %a6sa = sext <2 x i16> undef to <2 x i32>
708 %a6sb = sext <2 x i16> undef to <2 x i32>
709 %a6sm = mul <2 x i32> %a6sa, %a6sb
710 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
712 %a7za = zext <4 x i16> undef to <4 x i32>
713 %a7zb = zext <4 x i16> undef to <4 x i32>
714 %a7zm = mul <4 x i32> %a7za, %a7zb
715 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
717 %a7sa = sext <4 x i16> undef to <4 x i32>
718 %a7sb = sext <4 x i16> undef to <4 x i32>
719 %a7sm = mul <4 x i32> %a7sa, %a7sb
720 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
722 %a8za = zext <8 x i16> undef to <8 x i32>
723 %a8zb = zext <8 x i16> undef to <8 x i32>
724 %a8zm = mul <8 x i32> %a8za, %a8zb
725 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
727 %a8sa = sext <8 x i16> undef to <8 x i32>
728 %a8sb = sext <8 x i16> undef to <8 x i32>
729 %a8sm = mul <8 x i32> %a8sa, %a8sb
730 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
732 %a9za = zext <16 x i16> undef to <16 x i32>
733 %a9zb = zext <16 x i16> undef to <16 x i32>
734 %a9zm = mul <16 x i32> %a9za, %a9zb
735 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
737 %a9sa = sext <16 x i16> undef to <16 x i32>
738 %a9sb = sext <16 x i16> undef to <16 x i32>
739 %a9sm = mul <16 x i32> %a9sa, %a9sb
740 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
742 %a10m = mul <1 x i32> undef, undef
743 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
745 %a11m = mul <2 x i32> undef, undef
746 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
748 %a12m = mul <4 x i32> undef, undef
749 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
751 %a13m = mul <8 x i32> undef, undef
752 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
754 %a14m = mul <16 x i32> undef, undef
755 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
760 define void @mla_i64() {
761 ; CHECK-LABEL: 'mla_i64'
762 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
763 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64>
764 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb
765 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
766 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
767 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64>
768 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb
769 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
770 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
771 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64>
772 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb
773 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
774 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
775 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64>
776 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb
777 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
778 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
779 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64>
780 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb
781 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
782 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
783 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64>
784 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb
785 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
786 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
787 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64>
788 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb
789 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
790 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
791 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64>
792 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb
793 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
794 ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
795 ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64>
796 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb
797 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
798 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
799 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64>
800 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb
801 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
802 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
803 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64>
804 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb
805 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
806 ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
807 ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64>
808 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb
809 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
810 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
811 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64>
812 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb
813 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
814 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
815 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64>
816 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb
817 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
818 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
819 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64>
820 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb
821 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
822 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
823 ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64>
824 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb
825 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
826 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
827 ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64>
828 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb
829 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
830 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
831 ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64>
832 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb
833 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
834 ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
835 ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64>
836 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb
837 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
838 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
839 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64>
840 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb
841 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
842 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
843 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64>
844 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb
845 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
846 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
847 ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64>
848 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb
849 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
850 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
851 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64>
852 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb
853 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
854 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
855 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64>
856 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb
857 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
858 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
859 ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64>
860 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb
861 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
862 ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
863 ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64>
864 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb
865 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
866 ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
867 ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64>
868 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb
869 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
870 ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
871 ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64>
872 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb
873 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
874 ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
875 ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64>
876 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb
877 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
878 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
879 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64>
880 ; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb
881 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
882 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef
883 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
884 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a16m = mul <2 x i64> undef, undef
885 ; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
886 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a17m = mul <4 x i64> undef, undef
887 ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
888 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef
889 ; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
890 ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %a19m = mul <16 x i64> undef, undef
891 ; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
892 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
894 %a0za = zext <1 x i8> undef to <1 x i64>
895 %a0zb = zext <1 x i8> undef to <1 x i64>
896 %a0zm = mul <1 x i64> %a0za, %a0zb
897 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
899 %a0sa = sext <1 x i8> undef to <1 x i64>
900 %a0sb = sext <1 x i8> undef to <1 x i64>
901 %a0sm = mul <1 x i64> %a0sa, %a0sb
902 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
904 %a1za = zext <2 x i8> undef to <2 x i64>
905 %a1zb = zext <2 x i8> undef to <2 x i64>
906 %a1zm = mul <2 x i64> %a1za, %a1zb
907 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
909 %a1sa = sext <2 x i8> undef to <2 x i64>
910 %a1sb = sext <2 x i8> undef to <2 x i64>
911 %a1sm = mul <2 x i64> %a1sa, %a1sb
912 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
914 %a2za = zext <4 x i8> undef to <4 x i64>
915 %a2zb = zext <4 x i8> undef to <4 x i64>
916 %a2zm = mul <4 x i64> %a2za, %a2zb
917 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
919 %a2sa = sext <4 x i8> undef to <4 x i64>
920 %a2sb = sext <4 x i8> undef to <4 x i64>
921 %a2sm = mul <4 x i64> %a2sa, %a2sb
922 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
924 %a3za = zext <8 x i8> undef to <8 x i64>
925 %a3zb = zext <8 x i8> undef to <8 x i64>
926 %a3zm = mul <8 x i64> %a3za, %a3zb
927 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
929 %a3sa = sext <8 x i8> undef to <8 x i64>
930 %a3sb = sext <8 x i8> undef to <8 x i64>
931 %a3sm = mul <8 x i64> %a3sa, %a3sb
932 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
934 %a4za = zext <16 x i8> undef to <16 x i64>
935 %a4zb = zext <16 x i8> undef to <16 x i64>
936 %a4zm = mul <16 x i64> %a4za, %a4zb
937 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
939 %a4sa = sext <16 x i8> undef to <16 x i64>
940 %a4sb = sext <16 x i8> undef to <16 x i64>
941 %a4sm = mul <16 x i64> %a4sa, %a4sb
942 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
944 %a5za = zext <1 x i16> undef to <1 x i64>
945 %a5zb = zext <1 x i16> undef to <1 x i64>
946 %a5zm = mul <1 x i64> %a5za, %a5zb
947 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
949 %a5sa = sext <1 x i16> undef to <1 x i64>
950 %a5sb = sext <1 x i16> undef to <1 x i64>
951 %a5sm = mul <1 x i64> %a5sa, %a5sb
952 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
954 %a6za = zext <2 x i16> undef to <2 x i64>
955 %a6zb = zext <2 x i16> undef to <2 x i64>
956 %a6zm = mul <2 x i64> %a6za, %a6zb
957 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
959 %a6sa = sext <2 x i16> undef to <2 x i64>
960 %a6sb = sext <2 x i16> undef to <2 x i64>
961 %a6sm = mul <2 x i64> %a6sa, %a6sb
962 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
964 %a7za = zext <4 x i16> undef to <4 x i64>
965 %a7zb = zext <4 x i16> undef to <4 x i64>
966 %a7zm = mul <4 x i64> %a7za, %a7zb
967 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
969 %a7sa = sext <4 x i16> undef to <4 x i64>
970 %a7sb = sext <4 x i16> undef to <4 x i64>
971 %a7sm = mul <4 x i64> %a7sa, %a7sb
972 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
974 %a8za = zext <8 x i16> undef to <8 x i64>
975 %a8zb = zext <8 x i16> undef to <8 x i64>
976 %a8zm = mul <8 x i64> %a8za, %a8zb
977 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
979 %a8sa = sext <8 x i16> undef to <8 x i64>
980 %a8sb = sext <8 x i16> undef to <8 x i64>
981 %a8sm = mul <8 x i64> %a8sa, %a8sb
982 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
984 %a9za = zext <16 x i16> undef to <16 x i64>
985 %a9zb = zext <16 x i16> undef to <16 x i64>
986 %a9zm = mul <16 x i64> %a9za, %a9zb
987 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
989 %a9sa = sext <16 x i16> undef to <16 x i64>
990 %a9sb = sext <16 x i16> undef to <16 x i64>
991 %a9sm = mul <16 x i64> %a9sa, %a9sb
992 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
994 %a10za = zext <1 x i32> undef to <1 x i64>
995 %a10zb = zext <1 x i32> undef to <1 x i64>
996 %a10zm = mul <1 x i64> %a10za, %a10zb
997 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
999 %a10sa = sext <1 x i32> undef to <1 x i64>
1000 %a10sb = sext <1 x i32> undef to <1 x i64>
1001 %a10sm = mul <1 x i64> %a10sa, %a10sb
1002 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
1004 %a11za = zext <2 x i32> undef to <2 x i64>
1005 %a11zb = zext <2 x i32> undef to <2 x i64>
1006 %a11zm = mul <2 x i64> %a11za, %a11zb
1007 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
1009 %a11sa = sext <2 x i32> undef to <2 x i64>
1010 %a11sb = sext <2 x i32> undef to <2 x i64>
1011 %a11sm = mul <2 x i64> %a11sa, %a11sb
1012 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
1014 %a12za = zext <4 x i32> undef to <4 x i64>
1015 %a12zb = zext <4 x i32> undef to <4 x i64>
1016 %a12zm = mul <4 x i64> %a12za, %a12zb
1017 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
1019 %a12sa = sext <4 x i32> undef to <4 x i64>
1020 %a12sb = sext <4 x i32> undef to <4 x i64>
1021 %a12sm = mul <4 x i64> %a12sa, %a12sb
1022 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
1024 %a13za = zext <8 x i32> undef to <8 x i64>
1025 %a13zb = zext <8 x i32> undef to <8 x i64>
1026 %a13zm = mul <8 x i64> %a13za, %a13zb
1027 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
1029 %a13sa = sext <8 x i32> undef to <8 x i64>
1030 %a13sb = sext <8 x i32> undef to <8 x i64>
1031 %a13sm = mul <8 x i64> %a13sa, %a13sb
1032 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
1034 %a14za = zext <16 x i32> undef to <16 x i64>
1035 %a14zb = zext <16 x i32> undef to <16 x i64>
1036 %a14zm = mul <16 x i64> %a14za, %a14zb
1037 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
1039 %a14sa = sext <16 x i32> undef to <16 x i64>
1040 %a14sb = sext <16 x i32> undef to <16 x i64>
1041 %a14sm = mul <16 x i64> %a14sa, %a14sb
1042 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
1044 %a15m = mul <1 x i64> undef, undef
1045 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
1047 %a16m = mul <2 x i64> undef, undef
1048 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
1050 %a17m = mul <4 x i64> undef, undef
1051 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
1053 %a18m = mul <8 x i64> undef, undef
1054 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
1056 %a19m = mul <16 x i64> undef, undef
1057 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
1062 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
1063 declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>)
1064 declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
1065 declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
1066 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
1067 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
1068 declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>)
1069 declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
1070 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
1071 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
1072 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
1073 declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
1074 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
1075 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
1076 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
1077 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
1078 declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
1079 declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
1080 declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
1081 declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)