1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
4 ; COST-LABEL: add.i8.v8i8
5 ; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v)
6 ; CODE-LABEL: add.i8.v8i8
8 define i8 @add.i8.v8i8(<8 x i8> %v) {
9 %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v)
13 ; COST-LABEL: add.i8.v16i8
14 ; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v)
15 ; CODE-LABEL: add.i8.v16i8
16 ; CODE: addv b0, v0.16b
17 define i8 @add.i8.v16i8(<16 x i8> %v) {
18 %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v)
22 ; COST-LABEL: add.i16.v4i16
23 ; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v)
24 ; CODE-LABEL: add.i16.v4i16
25 ; CODE: addv h0, v0.4h
26 define i16 @add.i16.v4i16(<4 x i16> %v) {
27 %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v)
31 ; COST-LABEL: add.i16.v8i16
32 ; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v)
33 ; CODE-LABEL: add.i16.v8i16
34 ; CODE: addv h0, v0.8h
35 define i16 @add.i16.v8i16(<8 x i16> %v) {
36 %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v)
40 ; COST-LABEL: add.i32.v4i32
41 ; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v)
42 ; CODE-LABEL: add.i32.v4i32
43 ; CODE: addv s0, v0.4s
44 define i32 @add.i32.v4i32(<4 x i32> %v) {
45 %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v)
49 ; COST-LABEL: umin.i8.v8i8
50 ; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v)
51 ; CODE-LABEL: umin.i8.v8i8
52 ; CODE: uminv b0, v0.8b
53 define i8 @umin.i8.v8i8(<8 x i8> %v) {
54 %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v)
58 ; COST-LABEL: umin.i8.v16i8
59 ; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v)
60 ; CODE-LABEL: umin.i8.v16i8
61 ; CODE: uminv b0, v0.16b
62 define i8 @umin.i8.v16i8(<16 x i8> %v) {
63 %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v)
67 ; COST-LABEL: umin.i16.v4i16
68 ; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v)
69 ; CODE-LABEL: umin.i16.v4i16
70 ; CODE: uminv h0, v0.4h
71 define i16 @umin.i16.v4i16(<4 x i16> %v) {
72 %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v)
76 ; COST-LABEL: umin.i16.v8i16
77 ; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v)
78 ; CODE-LABEL: umin.i16.v8i16
79 ; CODE: uminv h0, v0.8h
80 define i16 @umin.i16.v8i16(<8 x i16> %v) {
81 %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v)
85 ; COST-LABEL: umin.i32.v4i32
86 ; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v)
87 ; CODE-LABEL: umin.i32.v4i32
88 ; CODE: uminv s0, v0.4s
89 define i32 @umin.i32.v4i32(<4 x i32> %v) {
90 %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v)
94 ; COST-LABEL: umax.i8.v8i8
95 ; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v)
96 ; CODE-LABEL: umax.i8.v8i8
97 ; CODE: umaxv b0, v0.8b
98 define i8 @umax.i8.v8i8(<8 x i8> %v) {
99 %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v)
103 ; COST-LABEL: umax.i8.v16i8
104 ; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v)
105 ; CODE-LABEL: umax.i8.v16i8
106 ; CODE: umaxv b0, v0.16b
107 define i8 @umax.i8.v16i8(<16 x i8> %v) {
108 %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v)
112 ; COST-LABEL: umax.i16.v4i16
113 ; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v)
114 ; CODE-LABEL: umax.i16.v4i16
115 ; CODE: umaxv h0, v0.4h
116 define i16 @umax.i16.v4i16(<4 x i16> %v) {
117 %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v)
121 ; COST-LABEL: umax.i16.v8i16
122 ; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v)
123 ; CODE-LABEL: umax.i16.v8i16
124 ; CODE: umaxv h0, v0.8h
125 define i16 @umax.i16.v8i16(<8 x i16> %v) {
126 %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v)
130 ; COST-LABEL: umax.i32.v4i32
131 ; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v)
132 ; CODE-LABEL: umax.i32.v4i32
133 ; CODE: umaxv s0, v0.4s
134 define i32 @umax.i32.v4i32(<4 x i32> %v) {
135 %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v)
139 ; COST-LABEL: smin.i8.v8i8
140 ; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v)
141 ; CODE-LABEL: smin.i8.v8i8
142 ; CODE: sminv b0, v0.8b
143 define i8 @smin.i8.v8i8(<8 x i8> %v) {
144 %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v)
148 ; COST-LABEL: smin.i8.v16i8
149 ; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v)
150 ; CODE-LABEL: smin.i8.v16i8
151 ; CODE: sminv b0, v0.16b
152 define i8 @smin.i8.v16i8(<16 x i8> %v) {
153 %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v)
157 ; COST-LABEL: smin.i16.v4i16
158 ; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v)
159 ; CODE-LABEL: smin.i16.v4i16
160 ; CODE: sminv h0, v0.4h
161 define i16 @smin.i16.v4i16(<4 x i16> %v) {
162 %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v)
166 ; COST-LABEL: smin.i16.v8i16
167 ; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v)
168 ; CODE-LABEL: smin.i16.v8i16
169 ; CODE: sminv h0, v0.8h
170 define i16 @smin.i16.v8i16(<8 x i16> %v) {
171 %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v)
175 ; COST-LABEL: smin.i32.v4i32
176 ; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v)
177 ; CODE-LABEL: smin.i32.v4i32
178 ; CODE: sminv s0, v0.4s
179 define i32 @smin.i32.v4i32(<4 x i32> %v) {
180 %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v)
184 ; COST-LABEL: smax.i8.v8i8
185 ; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v)
186 ; CODE-LABEL: smax.i8.v8i8
187 ; CODE: smaxv b0, v0.8b
188 define i8 @smax.i8.v8i8(<8 x i8> %v) {
189 %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v)
193 ; COST-LABEL: smax.i8.v16i8
194 ; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v)
195 ; CODE-LABEL: smax.i8.v16i8
196 ; CODE: smaxv b0, v0.16b
197 define i8 @smax.i8.v16i8(<16 x i8> %v) {
198 %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v)
202 ; COST-LABEL: smax.i16.v4i16
203 ; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v)
204 ; CODE-LABEL: smax.i16.v4i16
205 ; CODE: smaxv h0, v0.4h
206 define i16 @smax.i16.v4i16(<4 x i16> %v) {
207 %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v)
211 ; COST-LABEL: smax.i16.v8i16
212 ; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v)
213 ; CODE-LABEL: smax.i16.v8i16
214 ; CODE: smaxv h0, v0.8h
215 define i16 @smax.i16.v8i16(<8 x i16> %v) {
216 %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v)
220 ; COST-LABEL: smax.i32.v4i32
221 ; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v)
222 ; CODE-LABEL: smax.i32.v4i32
223 ; CODE: smaxv s0, v0.4s
224 define i32 @smax.i32.v4i32(<4 x i32> %v) {
225 %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v)
229 ; COST-LABEL: fmin.f32.v4f32
230 ; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v)
231 ; CODE-LABEL: fmin.f32.v4f32
232 ; CODE: fminnmv s0, v0.4s
233 define float @fmin.f32.v4f32(<4 x float> %v) {
234 %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v)
238 ; COST-LABEL: fmax.f32.v4f32
239 ; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v)
240 ; CODE-LABEL: fmax.f32.v4f32
241 ; CODE: fmaxnmv s0, v0.4s
242 define float @fmax.f32.v4f32(<4 x float> %v) {
243 %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v)
247 declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
248 declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
249 declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
250 declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
251 declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
253 declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
254 declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
255 declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
256 declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
257 declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
259 declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
260 declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
261 declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
262 declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
263 declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
265 declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
266 declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
267 declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
268 declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
269 declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
271 declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
272 declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
273 declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
274 declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
275 declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
277 declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
279 declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)