1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-MVE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefix=CHECK-BE
4 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-FP
6 define <16 x i8> @vector_add_i8(<16 x i8> %lhs, <16 x i8> %rhs) {
7 ; CHECK-LE-LABEL: vector_add_i8:
8 ; CHECK-LE: @ %bb.0: @ %entry
9 ; CHECK-LE-NEXT: vmov d0, r0, r1
10 ; CHECK-LE-NEXT: mov r0, sp
11 ; CHECK-LE-NEXT: vldrw.u32 q1, [r0]
12 ; CHECK-LE-NEXT: vmov d1, r2, r3
13 ; CHECK-LE-NEXT: vadd.i8 q0, q0, q1
14 ; CHECK-LE-NEXT: vmov r0, r1, d0
15 ; CHECK-LE-NEXT: vmov r2, r3, d1
16 ; CHECK-LE-NEXT: bx lr
18 ; CHECK-BE-LABEL: vector_add_i8:
19 ; CHECK-BE: @ %bb.0: @ %entry
20 ; CHECK-BE-NEXT: vmov d0, r1, r0
21 ; CHECK-BE-NEXT: mov r0, sp
22 ; CHECK-BE-NEXT: vmov d1, r3, r2
23 ; CHECK-BE-NEXT: vrev64.8 q1, q0
24 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
25 ; CHECK-BE-NEXT: vadd.i8 q0, q1, q0
26 ; CHECK-BE-NEXT: vrev64.8 q1, q0
27 ; CHECK-BE-NEXT: vmov r1, r0, d2
28 ; CHECK-BE-NEXT: vmov r3, r2, d3
29 ; CHECK-BE-NEXT: bx lr
31 %sum = add <16 x i8> %lhs, %rhs
35 define <8 x i16> @vector_add_i16(<8 x i16> %lhs, <8 x i16> %rhs) {
36 ; CHECK-LE-LABEL: vector_add_i16:
37 ; CHECK-LE: @ %bb.0: @ %entry
38 ; CHECK-LE-NEXT: vmov d0, r0, r1
39 ; CHECK-LE-NEXT: mov r0, sp
40 ; CHECK-LE-NEXT: vldrw.u32 q1, [r0]
41 ; CHECK-LE-NEXT: vmov d1, r2, r3
42 ; CHECK-LE-NEXT: vadd.i16 q0, q0, q1
43 ; CHECK-LE-NEXT: vmov r0, r1, d0
44 ; CHECK-LE-NEXT: vmov r2, r3, d1
45 ; CHECK-LE-NEXT: bx lr
47 ; CHECK-BE-LABEL: vector_add_i16:
48 ; CHECK-BE: @ %bb.0: @ %entry
49 ; CHECK-BE-NEXT: vmov d0, r1, r0
50 ; CHECK-BE-NEXT: mov r0, sp
51 ; CHECK-BE-NEXT: vmov d1, r3, r2
52 ; CHECK-BE-NEXT: vrev64.16 q1, q0
53 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
54 ; CHECK-BE-NEXT: vadd.i16 q0, q1, q0
55 ; CHECK-BE-NEXT: vrev64.16 q1, q0
56 ; CHECK-BE-NEXT: vmov r1, r0, d2
57 ; CHECK-BE-NEXT: vmov r3, r2, d3
58 ; CHECK-BE-NEXT: bx lr
60 %sum = add <8 x i16> %lhs, %rhs
64 define <4 x i32> @vector_add_i32(<4 x i32> %lhs, <4 x i32> %rhs) {
65 ; CHECK-LE-LABEL: vector_add_i32:
66 ; CHECK-LE: @ %bb.0: @ %entry
67 ; CHECK-LE-NEXT: vmov d0, r0, r1
68 ; CHECK-LE-NEXT: mov r0, sp
69 ; CHECK-LE-NEXT: vldrw.u32 q1, [r0]
70 ; CHECK-LE-NEXT: vmov d1, r2, r3
71 ; CHECK-LE-NEXT: vadd.i32 q0, q0, q1
72 ; CHECK-LE-NEXT: vmov r0, r1, d0
73 ; CHECK-LE-NEXT: vmov r2, r3, d1
74 ; CHECK-LE-NEXT: bx lr
76 ; CHECK-BE-LABEL: vector_add_i32:
77 ; CHECK-BE: @ %bb.0: @ %entry
78 ; CHECK-BE-NEXT: vmov d0, r1, r0
79 ; CHECK-BE-NEXT: mov r0, sp
80 ; CHECK-BE-NEXT: vmov d1, r3, r2
81 ; CHECK-BE-NEXT: vrev64.32 q1, q0
82 ; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
83 ; CHECK-BE-NEXT: vadd.i32 q0, q1, q0
84 ; CHECK-BE-NEXT: vrev64.32 q1, q0
85 ; CHECK-BE-NEXT: vmov r1, r0, d2
86 ; CHECK-BE-NEXT: vmov r3, r2, d3
87 ; CHECK-BE-NEXT: bx lr
89 %sum = add <4 x i32> %lhs, %rhs
93 define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
94 ; CHECK-MVE-LABEL: vector_add_i64:
95 ; CHECK-MVE: @ %bb.0: @ %entry
96 ; CHECK-MVE-NEXT: .save {r7, lr}
97 ; CHECK-MVE-NEXT: push {r7, lr}
98 ; CHECK-MVE-NEXT: add.w r12, sp, #8
99 ; CHECK-MVE-NEXT: vldrw.u32 q0, [r12]
100 ; CHECK-MVE-NEXT: vmov r12, lr, d0
101 ; CHECK-MVE-NEXT: adds.w r0, r0, r12
102 ; CHECK-MVE-NEXT: adc.w r1, r1, lr
103 ; CHECK-MVE-NEXT: vmov r12, lr, d1
104 ; CHECK-MVE-NEXT: adds.w r2, r2, r12
105 ; CHECK-MVE-NEXT: adc.w r3, r3, lr
106 ; CHECK-MVE-NEXT: pop {r7, pc}
108 ; CHECK-BE-LABEL: vector_add_i64:
109 ; CHECK-BE: @ %bb.0: @ %entry
110 ; CHECK-BE-NEXT: .save {r7, lr}
111 ; CHECK-BE-NEXT: push {r7, lr}
112 ; CHECK-BE-NEXT: add.w r12, sp, #8
113 ; CHECK-BE-NEXT: vldrw.u32 q0, [r12]
114 ; CHECK-BE-NEXT: vmov r12, lr, d0
115 ; CHECK-BE-NEXT: adds.w r1, r1, lr
116 ; CHECK-BE-NEXT: adc.w r0, r0, r12
117 ; CHECK-BE-NEXT: vmov r12, lr, d1
118 ; CHECK-BE-NEXT: adds.w r3, r3, lr
119 ; CHECK-BE-NEXT: adc.w r2, r2, r12
120 ; CHECK-BE-NEXT: pop {r7, pc}
122 ; CHECK-FP-LABEL: vector_add_i64:
123 ; CHECK-FP: @ %bb.0: @ %entry
124 ; CHECK-FP-NEXT: .save {r4, r5, r7, lr}
125 ; CHECK-FP-NEXT: push {r4, r5, r7, lr}
126 ; CHECK-FP-NEXT: add.w r12, sp, #16
127 ; CHECK-FP-NEXT: vldrw.u32 q0, [r12]
128 ; CHECK-FP-NEXT: vmov r12, lr, d0
129 ; CHECK-FP-NEXT: vmov r4, r5, d1
130 ; CHECK-FP-NEXT: adds.w r0, r0, r12
131 ; CHECK-FP-NEXT: adc.w r1, r1, lr
132 ; CHECK-FP-NEXT: adds r2, r2, r4
133 ; CHECK-FP-NEXT: adcs r3, r5
134 ; CHECK-FP-NEXT: pop {r4, r5, r7, pc}
136 %sum = add <2 x i64> %lhs, %rhs
140 define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
141 ; CHECK-MVE-LABEL: vector_add_f16:
142 ; CHECK-MVE: @ %bb.0: @ %entry
143 ; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
144 ; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
145 ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
146 ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
147 ; CHECK-MVE-NEXT: vmov d8, r0, r1
148 ; CHECK-MVE-NEXT: add r0, sp, #64
149 ; CHECK-MVE-NEXT: vldrw.u32 q6, [r0]
150 ; CHECK-MVE-NEXT: vmov d9, r2, r3
151 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[0]
152 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[0]
153 ; CHECK-MVE-NEXT: bl __aeabi_h2f
154 ; CHECK-MVE-NEXT: mov r5, r0
155 ; CHECK-MVE-NEXT: mov r0, r4
156 ; CHECK-MVE-NEXT: bl __aeabi_h2f
157 ; CHECK-MVE-NEXT: mov r1, r5
158 ; CHECK-MVE-NEXT: bl __aeabi_fadd
159 ; CHECK-MVE-NEXT: bl __aeabi_f2h
160 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0
161 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[1]
162 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[1]
163 ; CHECK-MVE-NEXT: bl __aeabi_h2f
164 ; CHECK-MVE-NEXT: mov r5, r0
165 ; CHECK-MVE-NEXT: mov r0, r4
166 ; CHECK-MVE-NEXT: bl __aeabi_h2f
167 ; CHECK-MVE-NEXT: mov r1, r5
168 ; CHECK-MVE-NEXT: bl __aeabi_fadd
169 ; CHECK-MVE-NEXT: bl __aeabi_f2h
170 ; CHECK-MVE-NEXT: vmov.16 q5[1], r0
171 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[2]
172 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[2]
173 ; CHECK-MVE-NEXT: bl __aeabi_h2f
174 ; CHECK-MVE-NEXT: mov r5, r0
175 ; CHECK-MVE-NEXT: mov r0, r4
176 ; CHECK-MVE-NEXT: bl __aeabi_h2f
177 ; CHECK-MVE-NEXT: mov r1, r5
178 ; CHECK-MVE-NEXT: bl __aeabi_fadd
179 ; CHECK-MVE-NEXT: bl __aeabi_f2h
180 ; CHECK-MVE-NEXT: vmov.16 q5[2], r0
181 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[3]
182 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[3]
183 ; CHECK-MVE-NEXT: bl __aeabi_h2f
184 ; CHECK-MVE-NEXT: mov r5, r0
185 ; CHECK-MVE-NEXT: mov r0, r4
186 ; CHECK-MVE-NEXT: bl __aeabi_h2f
187 ; CHECK-MVE-NEXT: mov r1, r5
188 ; CHECK-MVE-NEXT: bl __aeabi_fadd
189 ; CHECK-MVE-NEXT: bl __aeabi_f2h
190 ; CHECK-MVE-NEXT: vmov.16 q5[3], r0
191 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[4]
192 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[4]
193 ; CHECK-MVE-NEXT: bl __aeabi_h2f
194 ; CHECK-MVE-NEXT: mov r5, r0
195 ; CHECK-MVE-NEXT: mov r0, r4
196 ; CHECK-MVE-NEXT: bl __aeabi_h2f
197 ; CHECK-MVE-NEXT: mov r1, r5
198 ; CHECK-MVE-NEXT: bl __aeabi_fadd
199 ; CHECK-MVE-NEXT: bl __aeabi_f2h
200 ; CHECK-MVE-NEXT: vmov.16 q5[4], r0
201 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[5]
202 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[5]
203 ; CHECK-MVE-NEXT: bl __aeabi_h2f
204 ; CHECK-MVE-NEXT: mov r5, r0
205 ; CHECK-MVE-NEXT: mov r0, r4
206 ; CHECK-MVE-NEXT: bl __aeabi_h2f
207 ; CHECK-MVE-NEXT: mov r1, r5
208 ; CHECK-MVE-NEXT: bl __aeabi_fadd
209 ; CHECK-MVE-NEXT: bl __aeabi_f2h
210 ; CHECK-MVE-NEXT: vmov.16 q5[5], r0
211 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[6]
212 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[6]
213 ; CHECK-MVE-NEXT: bl __aeabi_h2f
214 ; CHECK-MVE-NEXT: mov r5, r0
215 ; CHECK-MVE-NEXT: mov r0, r4
216 ; CHECK-MVE-NEXT: bl __aeabi_h2f
217 ; CHECK-MVE-NEXT: mov r1, r5
218 ; CHECK-MVE-NEXT: bl __aeabi_fadd
219 ; CHECK-MVE-NEXT: bl __aeabi_f2h
220 ; CHECK-MVE-NEXT: vmov.16 q5[6], r0
221 ; CHECK-MVE-NEXT: vmov.u16 r0, q6[7]
222 ; CHECK-MVE-NEXT: vmov.u16 r4, q4[7]
223 ; CHECK-MVE-NEXT: bl __aeabi_h2f
224 ; CHECK-MVE-NEXT: mov r5, r0
225 ; CHECK-MVE-NEXT: mov r0, r4
226 ; CHECK-MVE-NEXT: bl __aeabi_h2f
227 ; CHECK-MVE-NEXT: mov r1, r5
228 ; CHECK-MVE-NEXT: bl __aeabi_fadd
229 ; CHECK-MVE-NEXT: bl __aeabi_f2h
230 ; CHECK-MVE-NEXT: vmov.16 q5[7], r0
231 ; CHECK-MVE-NEXT: vmov r0, r1, d10
232 ; CHECK-MVE-NEXT: vmov r2, r3, d11
233 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
234 ; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
236 ; CHECK-BE-LABEL: vector_add_f16:
237 ; CHECK-BE: @ %bb.0: @ %entry
238 ; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
239 ; CHECK-BE-NEXT: push {r4, r5, r7, lr}
240 ; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
241 ; CHECK-BE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
242 ; CHECK-BE-NEXT: vmov d0, r1, r0
243 ; CHECK-BE-NEXT: add r0, sp, #64
244 ; CHECK-BE-NEXT: vldrh.u16 q6, [r0]
245 ; CHECK-BE-NEXT: vmov d1, r3, r2
246 ; CHECK-BE-NEXT: vrev64.16 q4, q0
247 ; CHECK-BE-NEXT: vmov.u16 r0, q6[0]
248 ; CHECK-BE-NEXT: vmov.u16 r4, q4[0]
249 ; CHECK-BE-NEXT: bl __aeabi_h2f
250 ; CHECK-BE-NEXT: mov r5, r0
251 ; CHECK-BE-NEXT: mov r0, r4
252 ; CHECK-BE-NEXT: bl __aeabi_h2f
253 ; CHECK-BE-NEXT: mov r1, r5
254 ; CHECK-BE-NEXT: bl __aeabi_fadd
255 ; CHECK-BE-NEXT: bl __aeabi_f2h
256 ; CHECK-BE-NEXT: vmov.16 q5[0], r0
257 ; CHECK-BE-NEXT: vmov.u16 r0, q6[1]
258 ; CHECK-BE-NEXT: vmov.u16 r4, q4[1]
259 ; CHECK-BE-NEXT: bl __aeabi_h2f
260 ; CHECK-BE-NEXT: mov r5, r0
261 ; CHECK-BE-NEXT: mov r0, r4
262 ; CHECK-BE-NEXT: bl __aeabi_h2f
263 ; CHECK-BE-NEXT: mov r1, r5
264 ; CHECK-BE-NEXT: bl __aeabi_fadd
265 ; CHECK-BE-NEXT: bl __aeabi_f2h
266 ; CHECK-BE-NEXT: vmov.16 q5[1], r0
267 ; CHECK-BE-NEXT: vmov.u16 r0, q6[2]
268 ; CHECK-BE-NEXT: vmov.u16 r4, q4[2]
269 ; CHECK-BE-NEXT: bl __aeabi_h2f
270 ; CHECK-BE-NEXT: mov r5, r0
271 ; CHECK-BE-NEXT: mov r0, r4
272 ; CHECK-BE-NEXT: bl __aeabi_h2f
273 ; CHECK-BE-NEXT: mov r1, r5
274 ; CHECK-BE-NEXT: bl __aeabi_fadd
275 ; CHECK-BE-NEXT: bl __aeabi_f2h
276 ; CHECK-BE-NEXT: vmov.16 q5[2], r0
277 ; CHECK-BE-NEXT: vmov.u16 r0, q6[3]
278 ; CHECK-BE-NEXT: vmov.u16 r4, q4[3]
279 ; CHECK-BE-NEXT: bl __aeabi_h2f
280 ; CHECK-BE-NEXT: mov r5, r0
281 ; CHECK-BE-NEXT: mov r0, r4
282 ; CHECK-BE-NEXT: bl __aeabi_h2f
283 ; CHECK-BE-NEXT: mov r1, r5
284 ; CHECK-BE-NEXT: bl __aeabi_fadd
285 ; CHECK-BE-NEXT: bl __aeabi_f2h
286 ; CHECK-BE-NEXT: vmov.16 q5[3], r0
287 ; CHECK-BE-NEXT: vmov.u16 r0, q6[4]
288 ; CHECK-BE-NEXT: vmov.u16 r4, q4[4]
289 ; CHECK-BE-NEXT: bl __aeabi_h2f
290 ; CHECK-BE-NEXT: mov r5, r0
291 ; CHECK-BE-NEXT: mov r0, r4
292 ; CHECK-BE-NEXT: bl __aeabi_h2f
293 ; CHECK-BE-NEXT: mov r1, r5
294 ; CHECK-BE-NEXT: bl __aeabi_fadd
295 ; CHECK-BE-NEXT: bl __aeabi_f2h
296 ; CHECK-BE-NEXT: vmov.16 q5[4], r0
297 ; CHECK-BE-NEXT: vmov.u16 r0, q6[5]
298 ; CHECK-BE-NEXT: vmov.u16 r4, q4[5]
299 ; CHECK-BE-NEXT: bl __aeabi_h2f
300 ; CHECK-BE-NEXT: mov r5, r0
301 ; CHECK-BE-NEXT: mov r0, r4
302 ; CHECK-BE-NEXT: bl __aeabi_h2f
303 ; CHECK-BE-NEXT: mov r1, r5
304 ; CHECK-BE-NEXT: bl __aeabi_fadd
305 ; CHECK-BE-NEXT: bl __aeabi_f2h
306 ; CHECK-BE-NEXT: vmov.16 q5[5], r0
307 ; CHECK-BE-NEXT: vmov.u16 r0, q6[6]
308 ; CHECK-BE-NEXT: vmov.u16 r4, q4[6]
309 ; CHECK-BE-NEXT: bl __aeabi_h2f
310 ; CHECK-BE-NEXT: mov r5, r0
311 ; CHECK-BE-NEXT: mov r0, r4
312 ; CHECK-BE-NEXT: bl __aeabi_h2f
313 ; CHECK-BE-NEXT: mov r1, r5
314 ; CHECK-BE-NEXT: bl __aeabi_fadd
315 ; CHECK-BE-NEXT: bl __aeabi_f2h
316 ; CHECK-BE-NEXT: vmov.16 q5[6], r0
317 ; CHECK-BE-NEXT: vmov.u16 r0, q6[7]
318 ; CHECK-BE-NEXT: vmov.u16 r4, q4[7]
319 ; CHECK-BE-NEXT: bl __aeabi_h2f
320 ; CHECK-BE-NEXT: mov r5, r0
321 ; CHECK-BE-NEXT: mov r0, r4
322 ; CHECK-BE-NEXT: bl __aeabi_h2f
323 ; CHECK-BE-NEXT: mov r1, r5
324 ; CHECK-BE-NEXT: bl __aeabi_fadd
325 ; CHECK-BE-NEXT: bl __aeabi_f2h
326 ; CHECK-BE-NEXT: vmov.16 q5[7], r0
327 ; CHECK-BE-NEXT: vrev64.16 q0, q5
328 ; CHECK-BE-NEXT: vmov r1, r0, d0
329 ; CHECK-BE-NEXT: vmov r3, r2, d1
330 ; CHECK-BE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
331 ; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
333 ; CHECK-FP-LABEL: vector_add_f16:
334 ; CHECK-FP: @ %bb.0: @ %entry
335 ; CHECK-FP-NEXT: vmov d0, r0, r1
336 ; CHECK-FP-NEXT: mov r0, sp
337 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
338 ; CHECK-FP-NEXT: vmov d1, r2, r3
339 ; CHECK-FP-NEXT: vadd.f16 q0, q0, q1
340 ; CHECK-FP-NEXT: vmov r0, r1, d0
341 ; CHECK-FP-NEXT: vmov r2, r3, d1
342 ; CHECK-FP-NEXT: bx lr
344 %sum = fadd <8 x half> %lhs, %rhs
348 define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
349 ; CHECK-MVE-LABEL: vector_add_f32:
350 ; CHECK-MVE: @ %bb.0: @ %entry
351 ; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr}
352 ; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr}
353 ; CHECK-MVE-NEXT: .pad #4
354 ; CHECK-MVE-NEXT: sub sp, #4
355 ; CHECK-MVE-NEXT: .vsave {d8, d9}
356 ; CHECK-MVE-NEXT: vpush {d8, d9}
357 ; CHECK-MVE-NEXT: mov r4, r0
358 ; CHECK-MVE-NEXT: add r0, sp, #40
359 ; CHECK-MVE-NEXT: vldrw.u32 q4, [r0]
360 ; CHECK-MVE-NEXT: mov r6, r1
361 ; CHECK-MVE-NEXT: mov r0, r3
362 ; CHECK-MVE-NEXT: mov r5, r2
363 ; CHECK-MVE-NEXT: vmov r7, r1, d9
364 ; CHECK-MVE-NEXT: bl __aeabi_fadd
365 ; CHECK-MVE-NEXT: vmov s19, r0
366 ; CHECK-MVE-NEXT: mov r0, r5
367 ; CHECK-MVE-NEXT: mov r1, r7
368 ; CHECK-MVE-NEXT: bl __aeabi_fadd
369 ; CHECK-MVE-NEXT: vmov r5, r1, d8
370 ; CHECK-MVE-NEXT: vmov s18, r0
371 ; CHECK-MVE-NEXT: mov r0, r6
372 ; CHECK-MVE-NEXT: bl __aeabi_fadd
373 ; CHECK-MVE-NEXT: vmov s17, r0
374 ; CHECK-MVE-NEXT: mov r0, r4
375 ; CHECK-MVE-NEXT: mov r1, r5
376 ; CHECK-MVE-NEXT: bl __aeabi_fadd
377 ; CHECK-MVE-NEXT: vmov s16, r0
378 ; CHECK-MVE-NEXT: vmov r2, r3, d9
379 ; CHECK-MVE-NEXT: vmov r0, r1, d8
380 ; CHECK-MVE-NEXT: vpop {d8, d9}
381 ; CHECK-MVE-NEXT: add sp, #4
382 ; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc}
384 ; CHECK-BE-LABEL: vector_add_f32:
385 ; CHECK-BE: @ %bb.0: @ %entry
386 ; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
387 ; CHECK-BE-NEXT: push {r4, r5, r7, lr}
388 ; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11}
389 ; CHECK-BE-NEXT: vpush {d8, d9, d10, d11}
390 ; CHECK-BE-NEXT: vmov d0, r1, r0
391 ; CHECK-BE-NEXT: add r1, sp, #48
392 ; CHECK-BE-NEXT: vldrw.u32 q5, [r1]
393 ; CHECK-BE-NEXT: vmov d1, r3, r2
394 ; CHECK-BE-NEXT: vrev64.32 q4, q0
395 ; CHECK-BE-NEXT: vmov r4, r0, d9
396 ; CHECK-BE-NEXT: vmov r5, r1, d11
397 ; CHECK-BE-NEXT: bl __aeabi_fadd
398 ; CHECK-BE-NEXT: vmov s19, r0
399 ; CHECK-BE-NEXT: mov r0, r4
400 ; CHECK-BE-NEXT: mov r1, r5
401 ; CHECK-BE-NEXT: bl __aeabi_fadd
402 ; CHECK-BE-NEXT: vmov s18, r0
403 ; CHECK-BE-NEXT: vmov r4, r0, d8
404 ; CHECK-BE-NEXT: vmov r5, r1, d10
405 ; CHECK-BE-NEXT: bl __aeabi_fadd
406 ; CHECK-BE-NEXT: vmov s17, r0
407 ; CHECK-BE-NEXT: mov r0, r4
408 ; CHECK-BE-NEXT: mov r1, r5
409 ; CHECK-BE-NEXT: bl __aeabi_fadd
410 ; CHECK-BE-NEXT: vmov s16, r0
411 ; CHECK-BE-NEXT: vrev64.32 q0, q4
412 ; CHECK-BE-NEXT: vmov r1, r0, d0
413 ; CHECK-BE-NEXT: vmov r3, r2, d1
414 ; CHECK-BE-NEXT: vpop {d8, d9, d10, d11}
415 ; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
417 ; CHECK-FP-LABEL: vector_add_f32:
418 ; CHECK-FP: @ %bb.0: @ %entry
419 ; CHECK-FP-NEXT: vmov d0, r0, r1
420 ; CHECK-FP-NEXT: mov r0, sp
421 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
422 ; CHECK-FP-NEXT: vmov d1, r2, r3
423 ; CHECK-FP-NEXT: vadd.f32 q0, q0, q1
424 ; CHECK-FP-NEXT: vmov r0, r1, d0
425 ; CHECK-FP-NEXT: vmov r2, r3, d1
426 ; CHECK-FP-NEXT: bx lr
428 %sum = fadd <4 x float> %lhs, %rhs
432 define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
433 ; CHECK-MVE-LABEL: vector_add_f64:
434 ; CHECK-MVE: @ %bb.0: @ %entry
435 ; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr}
436 ; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr}
437 ; CHECK-MVE-NEXT: .pad #4
438 ; CHECK-MVE-NEXT: sub sp, #4
439 ; CHECK-MVE-NEXT: .vsave {d8, d9}
440 ; CHECK-MVE-NEXT: vpush {d8, d9}
441 ; CHECK-MVE-NEXT: mov r5, r0
442 ; CHECK-MVE-NEXT: add r0, sp, #40
443 ; CHECK-MVE-NEXT: vldrw.u32 q4, [r0]
444 ; CHECK-MVE-NEXT: mov r4, r2
445 ; CHECK-MVE-NEXT: mov r6, r3
446 ; CHECK-MVE-NEXT: mov r7, r1
447 ; CHECK-MVE-NEXT: vmov r2, r3, d9
448 ; CHECK-MVE-NEXT: mov r0, r4
449 ; CHECK-MVE-NEXT: mov r1, r6
450 ; CHECK-MVE-NEXT: bl __aeabi_dadd
451 ; CHECK-MVE-NEXT: vmov r2, r3, d8
452 ; CHECK-MVE-NEXT: mov r4, r0
453 ; CHECK-MVE-NEXT: mov r6, r1
454 ; CHECK-MVE-NEXT: mov r0, r5
455 ; CHECK-MVE-NEXT: mov r1, r7
456 ; CHECK-MVE-NEXT: bl __aeabi_dadd
457 ; CHECK-MVE-NEXT: mov r2, r4
458 ; CHECK-MVE-NEXT: mov r3, r6
459 ; CHECK-MVE-NEXT: vpop {d8, d9}
460 ; CHECK-MVE-NEXT: add sp, #4
461 ; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc}
463 ; CHECK-BE-LABEL: vector_add_f64:
464 ; CHECK-BE: @ %bb.0: @ %entry
465 ; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr}
466 ; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
467 ; CHECK-BE-NEXT: .pad #4
468 ; CHECK-BE-NEXT: sub sp, #4
469 ; CHECK-BE-NEXT: .vsave {d8, d9}
470 ; CHECK-BE-NEXT: vpush {d8, d9}
471 ; CHECK-BE-NEXT: mov r5, r0
472 ; CHECK-BE-NEXT: add r0, sp, #40
473 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
474 ; CHECK-BE-NEXT: mov r6, r2
475 ; CHECK-BE-NEXT: mov r4, r3
476 ; CHECK-BE-NEXT: mov r7, r1
477 ; CHECK-BE-NEXT: vrev64.8 q4, q0
478 ; CHECK-BE-NEXT: mov r0, r6
479 ; CHECK-BE-NEXT: vmov r3, r2, d9
480 ; CHECK-BE-NEXT: mov r1, r4
481 ; CHECK-BE-NEXT: bl __aeabi_dadd
482 ; CHECK-BE-NEXT: vmov r3, r2, d8
483 ; CHECK-BE-NEXT: mov r4, r0
484 ; CHECK-BE-NEXT: mov r6, r1
485 ; CHECK-BE-NEXT: mov r0, r5
486 ; CHECK-BE-NEXT: mov r1, r7
487 ; CHECK-BE-NEXT: bl __aeabi_dadd
488 ; CHECK-BE-NEXT: mov r2, r4
489 ; CHECK-BE-NEXT: mov r3, r6
490 ; CHECK-BE-NEXT: vpop {d8, d9}
491 ; CHECK-BE-NEXT: add sp, #4
492 ; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
494 ; CHECK-FP-LABEL: vector_add_f64:
495 ; CHECK-FP: @ %bb.0: @ %entry
496 ; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr}
497 ; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr}
498 ; CHECK-FP-NEXT: .pad #4
499 ; CHECK-FP-NEXT: sub sp, #4
500 ; CHECK-FP-NEXT: .vsave {d8, d9}
501 ; CHECK-FP-NEXT: vpush {d8, d9}
502 ; CHECK-FP-NEXT: mov r5, r2
503 ; CHECK-FP-NEXT: add r2, sp, #40
504 ; CHECK-FP-NEXT: vldrw.u32 q4, [r2]
505 ; CHECK-FP-NEXT: mov r4, r3
506 ; CHECK-FP-NEXT: vmov r2, r3, d8
507 ; CHECK-FP-NEXT: bl __aeabi_dadd
508 ; CHECK-FP-NEXT: vmov r2, r3, d9
509 ; CHECK-FP-NEXT: mov r6, r0
510 ; CHECK-FP-NEXT: mov r7, r1
511 ; CHECK-FP-NEXT: mov r0, r5
512 ; CHECK-FP-NEXT: mov r1, r4
513 ; CHECK-FP-NEXT: bl __aeabi_dadd
514 ; CHECK-FP-NEXT: mov r2, r0
515 ; CHECK-FP-NEXT: mov r3, r1
516 ; CHECK-FP-NEXT: mov r0, r6
517 ; CHECK-FP-NEXT: mov r1, r7
518 ; CHECK-FP-NEXT: vpop {d8, d9}
519 ; CHECK-FP-NEXT: add sp, #4
520 ; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc}
522 %sum = fadd <2 x double> %lhs, %rhs
523 ret <2 x double> %sum
526 define <4 x i32> @insertextract(i32 %x, i32 %y) {
527 ; CHECK-LE-LABEL: insertextract:
529 ; CHECK-LE-NEXT: mov r3, r1
530 ; CHECK-LE-NEXT: mov r1, r0
531 ; CHECK-LE-NEXT: mov r2, r0
532 ; CHECK-LE-NEXT: bx lr
534 ; CHECK-BE-LABEL: insertextract:
536 ; CHECK-BE-NEXT: mov r3, r1
537 ; CHECK-BE-NEXT: mov r1, r0
538 ; CHECK-BE-NEXT: mov r2, r0
539 ; CHECK-BE-NEXT: bx lr
540 %1 = insertelement <4 x i32> undef, i32 %x, i32 0
541 %2 = insertelement <4 x i32> %1, i32 %x, i32 1
542 %3 = insertelement <4 x i32> %2, i32 %x, i32 2
543 %4 = insertelement <4 x i32> %3, i32 %y, i32 3