1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
5 declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
6 declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
7 declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
9 define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) {
10 ; CHECK-LABEL: vaddv_v2i64_i64:
11 ; CHECK: @ %bb.0: @ %entry
12 ; CHECK-NEXT: vmov r0, s2
13 ; CHECK-NEXT: vmov r3, s0
14 ; CHECK-NEXT: vmov r1, s3
15 ; CHECK-NEXT: vmov r2, s1
16 ; CHECK-NEXT: adds r0, r0, r3
17 ; CHECK-NEXT: adcs r1, r2
20 %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
24 define arm_aapcs_vfpcc i32 @vaddv_v4i32_i32(<4 x i32> %s1) {
25 ; CHECK-LABEL: vaddv_v4i32_i32:
26 ; CHECK: @ %bb.0: @ %entry
27 ; CHECK-NEXT: vaddv.u32 r0, q0
30 %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
34 define arm_aapcs_vfpcc i16 @vaddv_v16i16_i16(<8 x i16> %s1) {
35 ; CHECK-LABEL: vaddv_v16i16_i16:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vaddv.u16 r0, q0
40 %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
44 define arm_aapcs_vfpcc i8 @vaddv_v16i8_i8(<16 x i8> %s1) {
45 ; CHECK-LABEL: vaddv_v16i8_i8:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vaddv.u8 r0, q0
50 %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1)
54 define arm_aapcs_vfpcc i64 @vaddva_v2i64_i64(<2 x i64> %s1, i64 %x) {
55 ; CHECK-LABEL: vaddva_v2i64_i64:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: .save {r7, lr}
58 ; CHECK-NEXT: push {r7, lr}
59 ; CHECK-NEXT: vmov r2, s2
60 ; CHECK-NEXT: vmov r3, s0
61 ; CHECK-NEXT: vmov r12, s3
62 ; CHECK-NEXT: vmov lr, s1
63 ; CHECK-NEXT: adds r2, r2, r3
64 ; CHECK-NEXT: adc.w r3, lr, r12
65 ; CHECK-NEXT: adds r0, r0, r2
66 ; CHECK-NEXT: adcs r1, r3
67 ; CHECK-NEXT: pop {r7, pc}
69 %t = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
74 define arm_aapcs_vfpcc i32 @vaddva_v4i32_i32(<4 x i32> %s1, i32 %x) {
75 ; CHECK-LABEL: vaddva_v4i32_i32:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vaddva.u32 r0, q0
80 %t = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
85 define arm_aapcs_vfpcc i16 @vaddva_v8i16_i16(<8 x i16> %s1, i16 %x) {
86 ; CHECK-LABEL: vaddva_v8i16_i16:
87 ; CHECK: @ %bb.0: @ %entry
88 ; CHECK-NEXT: vaddva.u16 r0, q0
91 %t = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
96 define arm_aapcs_vfpcc i8 @vaddva_v16i8_i8(<16 x i8> %s1, i8 %x) {
97 ; CHECK-LABEL: vaddva_v16i8_i8:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vaddva.u8 r0, q0
102 %t = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1)