1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FP %s
5 define <16 x i8> @vector_add_i8(<16 x i8> %lhs, <16 x i8> %rhs) {
6 ; CHECK-LABEL: vector_add_i8:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vmov d1, r2, r3
9 ; CHECK-NEXT: vmov d0, r0, r1
10 ; CHECK-NEXT: mov r0, sp
11 ; CHECK-NEXT: vldrw.u32 q1, [r0]
12 ; CHECK-NEXT: vadd.i8 q0, q0, q1
13 ; CHECK-NEXT: vmov r0, r1, d0
14 ; CHECK-NEXT: vmov r2, r3, d1
17 %sum = add <16 x i8> %lhs, %rhs
21 define <8 x i16> @vector_add_i16(<8 x i16> %lhs, <8 x i16> %rhs) {
22 ; CHECK-LABEL: vector_add_i16:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vmov d1, r2, r3
25 ; CHECK-NEXT: vmov d0, r0, r1
26 ; CHECK-NEXT: mov r0, sp
27 ; CHECK-NEXT: vldrw.u32 q1, [r0]
28 ; CHECK-NEXT: vadd.i16 q0, q0, q1
29 ; CHECK-NEXT: vmov r0, r1, d0
30 ; CHECK-NEXT: vmov r2, r3, d1
33 %sum = add <8 x i16> %lhs, %rhs
37 define <4 x i32> @vector_add_i32(<4 x i32> %lhs, <4 x i32> %rhs) {
38 ; CHECK-LABEL: vector_add_i32:
39 ; CHECK: @ %bb.0: @ %entry
40 ; CHECK-NEXT: vmov d1, r2, r3
41 ; CHECK-NEXT: vmov d0, r0, r1
42 ; CHECK-NEXT: mov r0, sp
43 ; CHECK-NEXT: vldrw.u32 q1, [r0]
44 ; CHECK-NEXT: vadd.i32 q0, q0, q1
45 ; CHECK-NEXT: vmov r0, r1, d0
46 ; CHECK-NEXT: vmov r2, r3, d1
49 %sum = add <4 x i32> %lhs, %rhs
53 define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
54 ; CHECK-FP-LABEL: vector_add_i64:
55 ; CHECK-FP: @ %bb.0: @ %entry
56 ; CHECK-FP-NEXT: .save {r7, lr}
57 ; CHECK-FP-NEXT: push {r7, lr}
58 ; CHECK-FP-NEXT: vmov d1, r2, r3
59 ; CHECK-FP-NEXT: vmov d0, r0, r1
60 ; CHECK-FP-NEXT: add r0, sp, #8
61 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
62 ; CHECK-FP-NEXT: vmov r1, s2
63 ; CHECK-FP-NEXT: vmov r0, s3
64 ; CHECK-FP-NEXT: vmov r3, s6
65 ; CHECK-FP-NEXT: vmov r2, s7
66 ; CHECK-FP-NEXT: adds.w lr, r1, r3
67 ; CHECK-FP-NEXT: vmov r3, s0
68 ; CHECK-FP-NEXT: vmov r1, s4
69 ; CHECK-FP-NEXT: adc.w r12, r0, r2
70 ; CHECK-FP-NEXT: vmov r2, s1
71 ; CHECK-FP-NEXT: vmov r0, s5
72 ; CHECK-FP-NEXT: adds r1, r1, r3
73 ; CHECK-FP-NEXT: vmov.32 q0[0], r1
74 ; CHECK-FP-NEXT: adcs r0, r2
75 ; CHECK-FP-NEXT: vmov.32 q0[1], r0
76 ; CHECK-FP-NEXT: vmov.32 q0[2], lr
77 ; CHECK-FP-NEXT: vmov.32 q0[3], r12
78 ; CHECK-FP-NEXT: vmov r0, r1, d0
79 ; CHECK-FP-NEXT: vmov r2, r3, d1
80 ; CHECK-FP-NEXT: pop {r7, pc}
82 %sum = add <2 x i64> %lhs, %rhs
86 define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
87 ; CHECK-FP-LABEL: vector_add_f16:
88 ; CHECK-FP: @ %bb.0: @ %entry
89 ; CHECK-FP-NEXT: vmov d1, r2, r3
90 ; CHECK-FP-NEXT: vmov d0, r0, r1
91 ; CHECK-FP-NEXT: mov r0, sp
92 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
93 ; CHECK-FP-NEXT: vadd.f16 q0, q0, q1
94 ; CHECK-FP-NEXT: vmov r0, r1, d0
95 ; CHECK-FP-NEXT: vmov r2, r3, d1
96 ; CHECK-FP-NEXT: bx lr
98 %sum = fadd <8 x half> %lhs, %rhs
102 define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
103 ; CHECK-FP-LABEL: vector_add_f32:
104 ; CHECK-FP: @ %bb.0: @ %entry
105 ; CHECK-FP-NEXT: vmov d1, r2, r3
106 ; CHECK-FP-NEXT: vmov d0, r0, r1
107 ; CHECK-FP-NEXT: mov r0, sp
108 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
109 ; CHECK-FP-NEXT: vadd.f32 q0, q0, q1
110 ; CHECK-FP-NEXT: vmov r0, r1, d0
111 ; CHECK-FP-NEXT: vmov r2, r3, d1
112 ; CHECK-FP-NEXT: bx lr
114 %sum = fadd <4 x float> %lhs, %rhs
118 define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
119 ; CHECK-FP-LABEL: vector_add_f64:
120 ; CHECK-FP: @ %bb.0: @ %entry
121 ; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr}
122 ; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr}
123 ; CHECK-FP-NEXT: .pad #4
124 ; CHECK-FP-NEXT: sub sp, #4
125 ; CHECK-FP-NEXT: .vsave {d8, d9}
126 ; CHECK-FP-NEXT: vpush {d8, d9}
127 ; CHECK-FP-NEXT: mov r5, r0
128 ; CHECK-FP-NEXT: add r0, sp, #40
129 ; CHECK-FP-NEXT: vldrw.u32 q4, [r0]
130 ; CHECK-FP-NEXT: mov r4, r2
131 ; CHECK-FP-NEXT: mov r6, r3
132 ; CHECK-FP-NEXT: mov r7, r1
133 ; CHECK-FP-NEXT: vmov r2, r3, d9
134 ; CHECK-FP-NEXT: mov r0, r4
135 ; CHECK-FP-NEXT: mov r1, r6
136 ; CHECK-FP-NEXT: bl __aeabi_dadd
137 ; CHECK-FP-NEXT: vmov r2, r3, d8
138 ; CHECK-FP-NEXT: vmov d9, r0, r1
139 ; CHECK-FP-NEXT: mov r0, r5
140 ; CHECK-FP-NEXT: mov r1, r7
141 ; CHECK-FP-NEXT: bl __aeabi_dadd
142 ; CHECK-FP-NEXT: vmov d8, r0, r1
143 ; CHECK-FP-NEXT: vmov r2, r3, d9
144 ; CHECK-FP-NEXT: vmov r0, r1, d8
145 ; CHECK-FP-NEXT: vpop {d8, d9}
146 ; CHECK-FP-NEXT: add sp, #4
147 ; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc}
149 %sum = fadd <2 x double> %lhs, %rhs
150 ret <2 x double> %sum