1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
3 declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
5 declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
7 declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
9 declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
11 declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
13 declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
15 declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
17 declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
19 declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
21 declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
23 declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
25 declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
27 declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
29 declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
31 declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
33 declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
35 declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
37 declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
39 declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
41 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
43 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
45 declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
47 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
49 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
51 declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
53 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
55 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
57 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
59 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
61 declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
63 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
65 declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
67 declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
69 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
71 declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
73 declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
75 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
77 declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
79 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
81 define i16 @test_vaddlv_s8(<8 x i8> %a) {
82 ; CHECK: test_vaddlv_s8:
83 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
85 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
86 %0 = trunc i32 %saddlvv.i to i16
90 define i32 @test_vaddlv_s16(<4 x i16> %a) {
91 ; CHECK: test_vaddlv_s16:
92 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
94 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
98 define i16 @test_vaddlv_u8(<8 x i8> %a) {
99 ; CHECK: test_vaddlv_u8:
100 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
102 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
103 %0 = trunc i32 %uaddlvv.i to i16
107 define i32 @test_vaddlv_u16(<4 x i16> %a) {
108 ; CHECK: test_vaddlv_u16:
109 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
111 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
115 define i16 @test_vaddlvq_s8(<16 x i8> %a) {
116 ; CHECK: test_vaddlvq_s8:
117 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
119 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
120 %0 = trunc i32 %saddlvv.i to i16
124 define i32 @test_vaddlvq_s16(<8 x i16> %a) {
125 ; CHECK: test_vaddlvq_s16:
126 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
128 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
132 define i64 @test_vaddlvq_s32(<4 x i32> %a) {
133 ; CHECK: test_vaddlvq_s32:
134 ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
136 %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
140 define i16 @test_vaddlvq_u8(<16 x i8> %a) {
141 ; CHECK: test_vaddlvq_u8:
142 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
144 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
145 %0 = trunc i32 %uaddlvv.i to i16
149 define i32 @test_vaddlvq_u16(<8 x i16> %a) {
150 ; CHECK: test_vaddlvq_u16:
151 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
153 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
157 define i64 @test_vaddlvq_u32(<4 x i32> %a) {
158 ; CHECK: test_vaddlvq_u32:
159 ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
161 %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
165 define i8 @test_vmaxv_s8(<8 x i8> %a) {
166 ; CHECK: test_vmaxv_s8:
167 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
169 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
170 %0 = trunc i32 %smaxv.i to i8
174 define i16 @test_vmaxv_s16(<4 x i16> %a) {
175 ; CHECK: test_vmaxv_s16:
176 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
178 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
179 %0 = trunc i32 %smaxv.i to i16
183 define i8 @test_vmaxv_u8(<8 x i8> %a) {
184 ; CHECK: test_vmaxv_u8:
185 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
187 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
188 %0 = trunc i32 %umaxv.i to i8
192 define i16 @test_vmaxv_u16(<4 x i16> %a) {
193 ; CHECK: test_vmaxv_u16:
194 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
196 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
197 %0 = trunc i32 %umaxv.i to i16
201 define i8 @test_vmaxvq_s8(<16 x i8> %a) {
202 ; CHECK: test_vmaxvq_s8:
203 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
205 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
206 %0 = trunc i32 %smaxv.i to i8
210 define i16 @test_vmaxvq_s16(<8 x i16> %a) {
211 ; CHECK: test_vmaxvq_s16:
212 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
214 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
215 %0 = trunc i32 %smaxv.i to i16
219 define i32 @test_vmaxvq_s32(<4 x i32> %a) {
220 ; CHECK: test_vmaxvq_s32:
221 ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
223 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
227 define i8 @test_vmaxvq_u8(<16 x i8> %a) {
228 ; CHECK: test_vmaxvq_u8:
229 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
231 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
232 %0 = trunc i32 %umaxv.i to i8
236 define i16 @test_vmaxvq_u16(<8 x i16> %a) {
237 ; CHECK: test_vmaxvq_u16:
238 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
240 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
241 %0 = trunc i32 %umaxv.i to i16
245 define i32 @test_vmaxvq_u32(<4 x i32> %a) {
246 ; CHECK: test_vmaxvq_u32:
247 ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
249 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
253 define i8 @test_vminv_s8(<8 x i8> %a) {
254 ; CHECK: test_vminv_s8:
255 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
257 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
258 %0 = trunc i32 %sminv.i to i8
262 define i16 @test_vminv_s16(<4 x i16> %a) {
263 ; CHECK: test_vminv_s16:
264 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
266 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
267 %0 = trunc i32 %sminv.i to i16
271 define i8 @test_vminv_u8(<8 x i8> %a) {
272 ; CHECK: test_vminv_u8:
273 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
275 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
276 %0 = trunc i32 %uminv.i to i8
280 define i16 @test_vminv_u16(<4 x i16> %a) {
281 ; CHECK: test_vminv_u16:
282 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
284 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
285 %0 = trunc i32 %uminv.i to i16
289 define i8 @test_vminvq_s8(<16 x i8> %a) {
290 ; CHECK: test_vminvq_s8:
291 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
293 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
294 %0 = trunc i32 %sminv.i to i8
298 define i16 @test_vminvq_s16(<8 x i16> %a) {
299 ; CHECK: test_vminvq_s16:
300 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
302 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
303 %0 = trunc i32 %sminv.i to i16
307 define i32 @test_vminvq_s32(<4 x i32> %a) {
308 ; CHECK: test_vminvq_s32:
309 ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
311 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
315 define i8 @test_vminvq_u8(<16 x i8> %a) {
316 ; CHECK: test_vminvq_u8:
317 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
319 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
320 %0 = trunc i32 %uminv.i to i8
324 define i16 @test_vminvq_u16(<8 x i16> %a) {
325 ; CHECK: test_vminvq_u16:
326 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
328 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
329 %0 = trunc i32 %uminv.i to i16
333 define i32 @test_vminvq_u32(<4 x i32> %a) {
334 ; CHECK: test_vminvq_u32:
335 ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
337 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
341 define i8 @test_vaddv_s8(<8 x i8> %a) {
342 ; CHECK: test_vaddv_s8:
343 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
345 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
346 %0 = trunc i32 %vaddv.i to i8
350 define i16 @test_vaddv_s16(<4 x i16> %a) {
351 ; CHECK: test_vaddv_s16:
352 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
354 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
355 %0 = trunc i32 %vaddv.i to i16
359 define i8 @test_vaddv_u8(<8 x i8> %a) {
360 ; CHECK: test_vaddv_u8:
361 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
363 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
364 %0 = trunc i32 %vaddv.i to i8
368 define i16 @test_vaddv_u16(<4 x i16> %a) {
369 ; CHECK: test_vaddv_u16:
370 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
372 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
373 %0 = trunc i32 %vaddv.i to i16
377 define i8 @test_vaddvq_s8(<16 x i8> %a) {
378 ; CHECK: test_vaddvq_s8:
379 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
381 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
382 %0 = trunc i32 %vaddv.i to i8
386 define i16 @test_vaddvq_s16(<8 x i16> %a) {
387 ; CHECK: test_vaddvq_s16:
388 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
390 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
391 %0 = trunc i32 %vaddv.i to i16
395 define i32 @test_vaddvq_s32(<4 x i32> %a) {
396 ; CHECK: test_vaddvq_s32:
397 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
399 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
403 define i8 @test_vaddvq_u8(<16 x i8> %a) {
404 ; CHECK: test_vaddvq_u8:
405 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
407 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
408 %0 = trunc i32 %vaddv.i to i8
412 define i16 @test_vaddvq_u16(<8 x i16> %a) {
413 ; CHECK: test_vaddvq_u16:
414 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
416 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
417 %0 = trunc i32 %vaddv.i to i16
421 define i32 @test_vaddvq_u32(<4 x i32> %a) {
422 ; CHECK: test_vaddvq_u32:
423 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
425 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
429 define float @test_vmaxvq_f32(<4 x float> %a) {
430 ; CHECK: test_vmaxvq_f32:
431 ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
433 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
437 define float @test_vminvq_f32(<4 x float> %a) {
438 ; CHECK: test_vminvq_f32:
439 ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
441 %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
445 define float @test_vmaxnmvq_f32(<4 x float> %a) {
446 ; CHECK: test_vmaxnmvq_f32:
447 ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
449 %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
453 define float @test_vminnmvq_f32(<4 x float> %a) {
454 ; CHECK: test_vminnmvq_f32:
455 ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
457 %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)