1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT
3 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD
5 define i64 @test_i64_f64(double %p) {
6 ; SOFT-LABEL: test_i64_f64:
8 ; SOFT-NEXT: vmov d16, r1, r0
9 ; SOFT-NEXT: vadd.f64 d16, d16, d16
10 ; SOFT-NEXT: vmov r0, r2, d16
11 ; SOFT-NEXT: adds r1, r0, r0
12 ; SOFT-NEXT: adc r0, r2, r2
15 ; HARD-LABEL: test_i64_f64:
17 ; HARD-NEXT: vadd.f64 d16, d0, d0
18 ; HARD-NEXT: vmov r0, r2, d16
19 ; HARD-NEXT: adds r1, r0, r0
20 ; HARD-NEXT: adc r0, r2, r2
22 %1 = fadd double %p, %p
23 %2 = bitcast double %1 to i64
28 define i64 @test_i64_v1i64(<1 x i64> %p) {
29 ; SOFT-LABEL: test_i64_v1i64:
31 ; SOFT-NEXT: vmov d16, r1, r0
32 ; SOFT-NEXT: vadd.i64 d16, d16, d16
33 ; SOFT-NEXT: vmov r0, r2, d16
34 ; SOFT-NEXT: adds r1, r0, r0
35 ; SOFT-NEXT: adc r0, r2, r2
38 ; HARD-LABEL: test_i64_v1i64:
40 ; HARD-NEXT: vadd.i64 d16, d0, d0
41 ; HARD-NEXT: vmov r0, r2, d16
42 ; HARD-NEXT: adds r1, r0, r0
43 ; HARD-NEXT: adc r0, r2, r2
45 %1 = add <1 x i64> %p, %p
46 %2 = bitcast <1 x i64> %1 to i64
51 define i64 @test_i64_v2f32(<2 x float> %p) {
52 ; SOFT-LABEL: test_i64_v2f32:
54 ; SOFT-NEXT: vmov d16, r1, r0
55 ; SOFT-NEXT: vrev64.32 d16, d16
56 ; SOFT-NEXT: vadd.f32 d16, d16, d16
57 ; SOFT-NEXT: vrev64.32 d16, d16
58 ; SOFT-NEXT: vmov r0, r2, d16
59 ; SOFT-NEXT: adds r1, r0, r0
60 ; SOFT-NEXT: adc r0, r2, r2
63 ; HARD-LABEL: test_i64_v2f32:
65 ; HARD-NEXT: vrev64.32 d16, d0
66 ; HARD-NEXT: vadd.f32 d16, d16, d16
67 ; HARD-NEXT: vrev64.32 d16, d16
68 ; HARD-NEXT: vmov r0, r2, d16
69 ; HARD-NEXT: adds r1, r0, r0
70 ; HARD-NEXT: adc r0, r2, r2
72 %1 = fadd <2 x float> %p, %p
73 %2 = bitcast <2 x float> %1 to i64
78 define i64 @test_i64_v2i32(<2 x i32> %p) {
79 ; SOFT-LABEL: test_i64_v2i32:
81 ; SOFT-NEXT: vmov d16, r1, r0
82 ; SOFT-NEXT: vrev64.32 d16, d16
83 ; SOFT-NEXT: vadd.i32 d16, d16, d16
84 ; SOFT-NEXT: vrev64.32 d16, d16
85 ; SOFT-NEXT: vmov r0, r2, d16
86 ; SOFT-NEXT: adds r1, r0, r0
87 ; SOFT-NEXT: adc r0, r2, r2
90 ; HARD-LABEL: test_i64_v2i32:
92 ; HARD-NEXT: vrev64.32 d16, d0
93 ; HARD-NEXT: vadd.i32 d16, d16, d16
94 ; HARD-NEXT: vrev64.32 d16, d16
95 ; HARD-NEXT: vmov r0, r2, d16
96 ; HARD-NEXT: adds r1, r0, r0
97 ; HARD-NEXT: adc r0, r2, r2
99 %1 = add <2 x i32> %p, %p
100 %2 = bitcast <2 x i32> %1 to i64
105 define i64 @test_i64_v4i16(<4 x i16> %p) {
106 ; SOFT-LABEL: test_i64_v4i16:
108 ; SOFT-NEXT: vmov d16, r1, r0
109 ; SOFT-NEXT: vrev64.16 d16, d16
110 ; SOFT-NEXT: vadd.i16 d16, d16, d16
111 ; SOFT-NEXT: vrev64.16 d16, d16
112 ; SOFT-NEXT: vmov r0, r2, d16
113 ; SOFT-NEXT: adds r1, r0, r0
114 ; SOFT-NEXT: adc r0, r2, r2
117 ; HARD-LABEL: test_i64_v4i16:
119 ; HARD-NEXT: vrev64.16 d16, d0
120 ; HARD-NEXT: vadd.i16 d16, d16, d16
121 ; HARD-NEXT: vrev64.16 d16, d16
122 ; HARD-NEXT: vmov r0, r2, d16
123 ; HARD-NEXT: adds r1, r0, r0
124 ; HARD-NEXT: adc r0, r2, r2
126 %1 = add <4 x i16> %p, %p
127 %2 = bitcast <4 x i16> %1 to i64
132 define i64 @test_i64_v8i8(<8 x i8> %p) {
133 ; SOFT-LABEL: test_i64_v8i8:
135 ; SOFT-NEXT: vmov d16, r1, r0
136 ; SOFT-NEXT: vrev64.8 d16, d16
137 ; SOFT-NEXT: vadd.i8 d16, d16, d16
138 ; SOFT-NEXT: vrev64.8 d16, d16
139 ; SOFT-NEXT: vmov r0, r2, d16
140 ; SOFT-NEXT: adds r1, r0, r0
141 ; SOFT-NEXT: adc r0, r2, r2
144 ; HARD-LABEL: test_i64_v8i8:
146 ; HARD-NEXT: vrev64.8 d16, d0
147 ; HARD-NEXT: vadd.i8 d16, d16, d16
148 ; HARD-NEXT: vrev64.8 d16, d16
149 ; HARD-NEXT: vmov r0, r2, d16
150 ; HARD-NEXT: adds r1, r0, r0
151 ; HARD-NEXT: adc r0, r2, r2
153 %1 = add <8 x i8> %p, %p
154 %2 = bitcast <8 x i8> %1 to i64
159 define double @test_f64_i64(i64 %p) {
160 ; SOFT-LABEL: test_f64_i64:
162 ; SOFT-NEXT: adds r1, r1, r1
163 ; SOFT-NEXT: adc r0, r0, r0
164 ; SOFT-NEXT: vmov d16, r1, r0
165 ; SOFT-NEXT: vadd.f64 d16, d16, d16
166 ; SOFT-NEXT: vmov r1, r0, d16
169 ; HARD-LABEL: test_f64_i64:
171 ; HARD-NEXT: adds r1, r1, r1
172 ; HARD-NEXT: adc r0, r0, r0
173 ; HARD-NEXT: vmov d16, r1, r0
174 ; HARD-NEXT: vadd.f64 d0, d16, d16
177 %2 = bitcast i64 %1 to double
178 %3 = fadd double %2, %2
182 define double @test_f64_v1i64(<1 x i64> %p) {
183 ; SOFT-LABEL: test_f64_v1i64:
185 ; SOFT-NEXT: vmov d16, r1, r0
186 ; SOFT-NEXT: vadd.i64 d16, d16, d16
187 ; SOFT-NEXT: vadd.f64 d16, d16, d16
188 ; SOFT-NEXT: vmov r1, r0, d16
191 ; HARD-LABEL: test_f64_v1i64:
193 ; HARD-NEXT: vadd.i64 d16, d0, d0
194 ; HARD-NEXT: vadd.f64 d0, d16, d16
196 %1 = add <1 x i64> %p, %p
197 %2 = bitcast <1 x i64> %1 to double
198 %3 = fadd double %2, %2
202 define double @test_f64_v2f32(<2 x float> %p) {
203 ; SOFT-LABEL: test_f64_v2f32:
205 ; SOFT-NEXT: vmov d16, r1, r0
206 ; SOFT-NEXT: vrev64.32 d16, d16
207 ; SOFT-NEXT: vadd.f32 d16, d16, d16
208 ; SOFT-NEXT: vrev64.32 d16, d16
209 ; SOFT-NEXT: vadd.f64 d16, d16, d16
210 ; SOFT-NEXT: vmov r1, r0, d16
213 ; HARD-LABEL: test_f64_v2f32:
215 ; HARD-NEXT: vrev64.32 d16, d0
216 ; HARD-NEXT: vadd.f32 d16, d16, d16
217 ; HARD-NEXT: vrev64.32 d16, d16
218 ; HARD-NEXT: vadd.f64 d0, d16, d16
220 %1 = fadd <2 x float> %p, %p
221 %2 = bitcast <2 x float> %1 to double
222 %3 = fadd double %2, %2
226 define double @test_f64_v2i32(<2 x i32> %p) {
227 ; SOFT-LABEL: test_f64_v2i32:
229 ; SOFT-NEXT: vmov d16, r1, r0
230 ; SOFT-NEXT: vrev64.32 d16, d16
231 ; SOFT-NEXT: vadd.i32 d16, d16, d16
232 ; SOFT-NEXT: vrev64.32 d16, d16
233 ; SOFT-NEXT: vadd.f64 d16, d16, d16
234 ; SOFT-NEXT: vmov r1, r0, d16
237 ; HARD-LABEL: test_f64_v2i32:
239 ; HARD-NEXT: vrev64.32 d16, d0
240 ; HARD-NEXT: vadd.i32 d16, d16, d16
241 ; HARD-NEXT: vrev64.32 d16, d16
242 ; HARD-NEXT: vadd.f64 d0, d16, d16
244 %1 = add <2 x i32> %p, %p
245 %2 = bitcast <2 x i32> %1 to double
246 %3 = fadd double %2, %2
250 define double @test_f64_v4i16(<4 x i16> %p) {
251 ; SOFT-LABEL: test_f64_v4i16:
253 ; SOFT-NEXT: vmov d16, r1, r0
254 ; SOFT-NEXT: vrev64.16 d16, d16
255 ; SOFT-NEXT: vadd.i16 d16, d16, d16
256 ; SOFT-NEXT: vrev64.16 d16, d16
257 ; SOFT-NEXT: vadd.f64 d16, d16, d16
258 ; SOFT-NEXT: vmov r1, r0, d16
261 ; HARD-LABEL: test_f64_v4i16:
263 ; HARD-NEXT: vrev64.16 d16, d0
264 ; HARD-NEXT: vadd.i16 d16, d16, d16
265 ; HARD-NEXT: vrev64.16 d16, d16
266 ; HARD-NEXT: vadd.f64 d0, d16, d16
268 %1 = add <4 x i16> %p, %p
269 %2 = bitcast <4 x i16> %1 to double
270 %3 = fadd double %2, %2
274 define double @test_f64_v8i8(<8 x i8> %p) {
275 ; SOFT-LABEL: test_f64_v8i8:
277 ; SOFT-NEXT: vmov d16, r1, r0
278 ; SOFT-NEXT: vrev64.8 d16, d16
279 ; SOFT-NEXT: vadd.i8 d16, d16, d16
280 ; SOFT-NEXT: vrev64.8 d16, d16
281 ; SOFT-NEXT: vadd.f64 d16, d16, d16
282 ; SOFT-NEXT: vmov r1, r0, d16
285 ; HARD-LABEL: test_f64_v8i8:
287 ; HARD-NEXT: vrev64.8 d16, d0
288 ; HARD-NEXT: vadd.i8 d16, d16, d16
289 ; HARD-NEXT: vrev64.8 d16, d16
290 ; HARD-NEXT: vadd.f64 d0, d16, d16
292 %1 = add <8 x i8> %p, %p
293 %2 = bitcast <8 x i8> %1 to double
294 %3 = fadd double %2, %2
298 define <1 x i64> @test_v1i64_i64(i64 %p) {
299 ; SOFT-LABEL: test_v1i64_i64:
301 ; SOFT-NEXT: adds r1, r1, r1
302 ; SOFT-NEXT: adc r0, r0, r0
303 ; SOFT-NEXT: vmov d16, r1, r0
304 ; SOFT-NEXT: vadd.i64 d16, d16, d16
305 ; SOFT-NEXT: vmov r1, r0, d16
308 ; HARD-LABEL: test_v1i64_i64:
310 ; HARD-NEXT: adds r1, r1, r1
311 ; HARD-NEXT: adc r0, r0, r0
312 ; HARD-NEXT: vmov d16, r1, r0
313 ; HARD-NEXT: vadd.i64 d0, d16, d16
316 %2 = bitcast i64 %1 to <1 x i64>
317 %3 = add <1 x i64> %2, %2
321 define <1 x i64> @test_v1i64_f64(double %p) {
322 ; SOFT-LABEL: test_v1i64_f64:
324 ; SOFT-NEXT: vmov d16, r1, r0
325 ; SOFT-NEXT: vadd.f64 d16, d16, d16
326 ; SOFT-NEXT: vadd.i64 d16, d16, d16
327 ; SOFT-NEXT: vmov r1, r0, d16
330 ; HARD-LABEL: test_v1i64_f64:
332 ; HARD-NEXT: vadd.f64 d16, d0, d0
333 ; HARD-NEXT: vadd.i64 d0, d16, d16
335 %1 = fadd double %p, %p
336 %2 = bitcast double %1 to <1 x i64>
337 %3 = add <1 x i64> %2, %2
341 define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) {
342 ; SOFT-LABEL: test_v1i64_v2f32:
344 ; SOFT-NEXT: vmov d16, r1, r0
345 ; SOFT-NEXT: vrev64.32 d16, d16
346 ; SOFT-NEXT: vadd.f32 d16, d16, d16
347 ; SOFT-NEXT: vrev64.32 d16, d16
348 ; SOFT-NEXT: vadd.i64 d16, d16, d16
349 ; SOFT-NEXT: vmov r1, r0, d16
352 ; HARD-LABEL: test_v1i64_v2f32:
354 ; HARD-NEXT: vrev64.32 d16, d0
355 ; HARD-NEXT: vadd.f32 d16, d16, d16
356 ; HARD-NEXT: vrev64.32 d16, d16
357 ; HARD-NEXT: vadd.i64 d0, d16, d16
359 %1 = fadd <2 x float> %p, %p
360 %2 = bitcast <2 x float> %1 to <1 x i64>
361 %3 = add <1 x i64> %2, %2
365 define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) {
366 ; SOFT-LABEL: test_v1i64_v2i32:
368 ; SOFT-NEXT: vmov d16, r1, r0
369 ; SOFT-NEXT: vrev64.32 d16, d16
370 ; SOFT-NEXT: vadd.i32 d16, d16, d16
371 ; SOFT-NEXT: vrev64.32 d16, d16
372 ; SOFT-NEXT: vadd.i64 d16, d16, d16
373 ; SOFT-NEXT: vmov r1, r0, d16
376 ; HARD-LABEL: test_v1i64_v2i32:
378 ; HARD-NEXT: vrev64.32 d16, d0
379 ; HARD-NEXT: vadd.i32 d16, d16, d16
380 ; HARD-NEXT: vrev64.32 d16, d16
381 ; HARD-NEXT: vadd.i64 d0, d16, d16
383 %1 = add <2 x i32> %p, %p
384 %2 = bitcast <2 x i32> %1 to <1 x i64>
385 %3 = add <1 x i64> %2, %2
389 define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) {
390 ; SOFT-LABEL: test_v1i64_v4i16:
392 ; SOFT-NEXT: vmov d16, r1, r0
393 ; SOFT-NEXT: vrev64.16 d16, d16
394 ; SOFT-NEXT: vadd.i16 d16, d16, d16
395 ; SOFT-NEXT: vrev64.16 d16, d16
396 ; SOFT-NEXT: vadd.i64 d16, d16, d16
397 ; SOFT-NEXT: vmov r1, r0, d16
400 ; HARD-LABEL: test_v1i64_v4i16:
402 ; HARD-NEXT: vrev64.16 d16, d0
403 ; HARD-NEXT: vadd.i16 d16, d16, d16
404 ; HARD-NEXT: vrev64.16 d16, d16
405 ; HARD-NEXT: vadd.i64 d0, d16, d16
407 %1 = add <4 x i16> %p, %p
408 %2 = bitcast <4 x i16> %1 to <1 x i64>
409 %3 = add <1 x i64> %2, %2
413 define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) {
414 ; SOFT-LABEL: test_v1i64_v8i8:
416 ; SOFT-NEXT: vmov d16, r1, r0
417 ; SOFT-NEXT: vrev64.8 d16, d16
418 ; SOFT-NEXT: vadd.i8 d16, d16, d16
419 ; SOFT-NEXT: vrev64.8 d16, d16
420 ; SOFT-NEXT: vadd.i64 d16, d16, d16
421 ; SOFT-NEXT: vmov r1, r0, d16
424 ; HARD-LABEL: test_v1i64_v8i8:
426 ; HARD-NEXT: vrev64.8 d16, d0
427 ; HARD-NEXT: vadd.i8 d16, d16, d16
428 ; HARD-NEXT: vrev64.8 d16, d16
429 ; HARD-NEXT: vadd.i64 d0, d16, d16
431 %1 = add <8 x i8> %p, %p
432 %2 = bitcast <8 x i8> %1 to <1 x i64>
433 %3 = add <1 x i64> %2, %2
437 define <2 x float> @test_v2f32_i64(i64 %p) {
438 ; SOFT-LABEL: test_v2f32_i64:
440 ; SOFT-NEXT: adds r1, r1, r1
441 ; SOFT-NEXT: adc r0, r0, r0
442 ; SOFT-NEXT: vmov d16, r1, r0
443 ; SOFT-NEXT: vrev64.32 d16, d16
444 ; SOFT-NEXT: vadd.f32 d16, d16, d16
445 ; SOFT-NEXT: vrev64.32 d16, d16
446 ; SOFT-NEXT: vmov r1, r0, d16
449 ; HARD-LABEL: test_v2f32_i64:
451 ; HARD-NEXT: adds r1, r1, r1
452 ; HARD-NEXT: adc r0, r0, r0
453 ; HARD-NEXT: vmov d16, r1, r0
454 ; HARD-NEXT: vrev64.32 d16, d16
455 ; HARD-NEXT: vadd.f32 d16, d16, d16
456 ; HARD-NEXT: vrev64.32 d0, d16
459 %2 = bitcast i64 %1 to <2 x float>
460 %3 = fadd <2 x float> %2, %2
464 define <2 x float> @test_v2f32_f64(double %p) {
465 ; SOFT-LABEL: test_v2f32_f64:
467 ; SOFT-NEXT: vmov d16, r1, r0
468 ; SOFT-NEXT: vadd.f64 d16, d16, d16
469 ; SOFT-NEXT: vrev64.32 d16, d16
470 ; SOFT-NEXT: vadd.f32 d16, d16, d16
471 ; SOFT-NEXT: vrev64.32 d16, d16
472 ; SOFT-NEXT: vmov r1, r0, d16
475 ; HARD-LABEL: test_v2f32_f64:
477 ; HARD-NEXT: vadd.f64 d16, d0, d0
478 ; HARD-NEXT: vrev64.32 d16, d16
479 ; HARD-NEXT: vadd.f32 d16, d16, d16
480 ; HARD-NEXT: vrev64.32 d0, d16
482 %1 = fadd double %p, %p
483 %2 = bitcast double %1 to <2 x float>
484 %3 = fadd <2 x float> %2, %2
488 define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) {
489 ; SOFT-LABEL: test_v2f32_v1i64:
491 ; SOFT-NEXT: vmov d16, r1, r0
492 ; SOFT-NEXT: vadd.i64 d16, d16, d16
493 ; SOFT-NEXT: vrev64.32 d16, d16
494 ; SOFT-NEXT: vadd.f32 d16, d16, d16
495 ; SOFT-NEXT: vrev64.32 d16, d16
496 ; SOFT-NEXT: vmov r1, r0, d16
499 ; HARD-LABEL: test_v2f32_v1i64:
501 ; HARD-NEXT: vadd.i64 d16, d0, d0
502 ; HARD-NEXT: vrev64.32 d16, d16
503 ; HARD-NEXT: vadd.f32 d16, d16, d16
504 ; HARD-NEXT: vrev64.32 d0, d16
506 %1 = add <1 x i64> %p, %p
507 %2 = bitcast <1 x i64> %1 to <2 x float>
508 %3 = fadd <2 x float> %2, %2
512 define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) {
513 ; SOFT-LABEL: test_v2f32_v2i32:
515 ; SOFT-NEXT: vmov d16, r1, r0
516 ; SOFT-NEXT: vrev64.32 d16, d16
517 ; SOFT-NEXT: vadd.i32 d16, d16, d16
518 ; SOFT-NEXT: vadd.f32 d16, d16, d16
519 ; SOFT-NEXT: vrev64.32 d16, d16
520 ; SOFT-NEXT: vmov r1, r0, d16
523 ; HARD-LABEL: test_v2f32_v2i32:
525 ; HARD-NEXT: vrev64.32 d16, d0
526 ; HARD-NEXT: vadd.i32 d16, d16, d16
527 ; HARD-NEXT: vadd.f32 d16, d16, d16
528 ; HARD-NEXT: vrev64.32 d0, d16
530 %1 = add <2 x i32> %p, %p
531 %2 = bitcast <2 x i32> %1 to <2 x float>
532 %3 = fadd <2 x float> %2, %2
536 define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) {
537 ; SOFT-LABEL: test_v2f32_v4i16:
539 ; SOFT-NEXT: vmov d16, r1, r0
540 ; SOFT-NEXT: vrev64.16 d16, d16
541 ; SOFT-NEXT: vadd.i16 d16, d16, d16
542 ; SOFT-NEXT: vrev32.16 d16, d16
543 ; SOFT-NEXT: vadd.f32 d16, d16, d16
544 ; SOFT-NEXT: vrev64.32 d16, d16
545 ; SOFT-NEXT: vmov r1, r0, d16
548 ; HARD-LABEL: test_v2f32_v4i16:
550 ; HARD-NEXT: vrev64.16 d16, d0
551 ; HARD-NEXT: vadd.i16 d16, d16, d16
552 ; HARD-NEXT: vrev32.16 d16, d16
553 ; HARD-NEXT: vadd.f32 d16, d16, d16
554 ; HARD-NEXT: vrev64.32 d0, d16
556 %1 = add <4 x i16> %p, %p
557 %2 = bitcast <4 x i16> %1 to <2 x float>
558 %3 = fadd <2 x float> %2, %2
562 define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) {
563 ; SOFT-LABEL: test_v2f32_v8i8:
565 ; SOFT-NEXT: vmov d16, r1, r0
566 ; SOFT-NEXT: vrev64.8 d16, d16
567 ; SOFT-NEXT: vadd.i8 d16, d16, d16
568 ; SOFT-NEXT: vrev32.8 d16, d16
569 ; SOFT-NEXT: vadd.f32 d16, d16, d16
570 ; SOFT-NEXT: vrev64.32 d16, d16
571 ; SOFT-NEXT: vmov r1, r0, d16
574 ; HARD-LABEL: test_v2f32_v8i8:
576 ; HARD-NEXT: vrev64.8 d16, d0
577 ; HARD-NEXT: vadd.i8 d16, d16, d16
578 ; HARD-NEXT: vrev32.8 d16, d16
579 ; HARD-NEXT: vadd.f32 d16, d16, d16
580 ; HARD-NEXT: vrev64.32 d0, d16
582 %1 = add <8 x i8> %p, %p
583 %2 = bitcast <8 x i8> %1 to <2 x float>
584 %3 = fadd <2 x float> %2, %2
588 define <2 x i32> @test_v2i32_i64(i64 %p) {
589 ; SOFT-LABEL: test_v2i32_i64:
591 ; SOFT-NEXT: adds r1, r1, r1
592 ; SOFT-NEXT: adc r0, r0, r0
593 ; SOFT-NEXT: vmov d16, r1, r0
594 ; SOFT-NEXT: vrev64.32 d16, d16
595 ; SOFT-NEXT: vadd.i32 d16, d16, d16
596 ; SOFT-NEXT: vrev64.32 d16, d16
597 ; SOFT-NEXT: vmov r1, r0, d16
600 ; HARD-LABEL: test_v2i32_i64:
602 ; HARD-NEXT: adds r1, r1, r1
603 ; HARD-NEXT: adc r0, r0, r0
604 ; HARD-NEXT: vmov d16, r1, r0
605 ; HARD-NEXT: vrev64.32 d16, d16
606 ; HARD-NEXT: vadd.i32 d16, d16, d16
607 ; HARD-NEXT: vrev64.32 d0, d16
610 %2 = bitcast i64 %1 to <2 x i32>
611 %3 = add <2 x i32> %2, %2
615 define <2 x i32> @test_v2i32_f64(double %p) {
616 ; SOFT-LABEL: test_v2i32_f64:
618 ; SOFT-NEXT: vmov d16, r1, r0
619 ; SOFT-NEXT: vadd.f64 d16, d16, d16
620 ; SOFT-NEXT: vrev64.32 d16, d16
621 ; SOFT-NEXT: vadd.i32 d16, d16, d16
622 ; SOFT-NEXT: vrev64.32 d16, d16
623 ; SOFT-NEXT: vmov r1, r0, d16
626 ; HARD-LABEL: test_v2i32_f64:
628 ; HARD-NEXT: vadd.f64 d16, d0, d0
629 ; HARD-NEXT: vrev64.32 d16, d16
630 ; HARD-NEXT: vadd.i32 d16, d16, d16
631 ; HARD-NEXT: vrev64.32 d0, d16
633 %1 = fadd double %p, %p
634 %2 = bitcast double %1 to <2 x i32>
635 %3 = add <2 x i32> %2, %2
639 define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) {
640 ; SOFT-LABEL: test_v2i32_v1i64:
642 ; SOFT-NEXT: vmov d16, r1, r0
643 ; SOFT-NEXT: vadd.i64 d16, d16, d16
644 ; SOFT-NEXT: vrev64.32 d16, d16
645 ; SOFT-NEXT: vadd.i32 d16, d16, d16
646 ; SOFT-NEXT: vrev64.32 d16, d16
647 ; SOFT-NEXT: vmov r1, r0, d16
650 ; HARD-LABEL: test_v2i32_v1i64:
652 ; HARD-NEXT: vadd.i64 d16, d0, d0
653 ; HARD-NEXT: vrev64.32 d16, d16
654 ; HARD-NEXT: vadd.i32 d16, d16, d16
655 ; HARD-NEXT: vrev64.32 d0, d16
657 %1 = add <1 x i64> %p, %p
658 %2 = bitcast <1 x i64> %1 to <2 x i32>
659 %3 = add <2 x i32> %2, %2
663 define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) {
664 ; SOFT-LABEL: test_v2i32_v2f32:
666 ; SOFT-NEXT: vmov d16, r1, r0
667 ; SOFT-NEXT: vrev64.32 d16, d16
668 ; SOFT-NEXT: vadd.f32 d16, d16, d16
669 ; SOFT-NEXT: vadd.i32 d16, d16, d16
670 ; SOFT-NEXT: vrev64.32 d16, d16
671 ; SOFT-NEXT: vmov r1, r0, d16
674 ; HARD-LABEL: test_v2i32_v2f32:
676 ; HARD-NEXT: vrev64.32 d16, d0
677 ; HARD-NEXT: vadd.f32 d16, d16, d16
678 ; HARD-NEXT: vadd.i32 d16, d16, d16
679 ; HARD-NEXT: vrev64.32 d0, d16
681 %1 = fadd <2 x float> %p, %p
682 %2 = bitcast <2 x float> %1 to <2 x i32>
683 %3 = add <2 x i32> %2, %2
687 define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) {
688 ; SOFT-LABEL: test_v2i32_v4i16:
690 ; SOFT-NEXT: vmov d16, r1, r0
691 ; SOFT-NEXT: vrev64.16 d16, d16
692 ; SOFT-NEXT: vadd.i16 d16, d16, d16
693 ; SOFT-NEXT: vrev32.16 d16, d16
694 ; SOFT-NEXT: vadd.i32 d16, d16, d16
695 ; SOFT-NEXT: vrev64.32 d16, d16
696 ; SOFT-NEXT: vmov r1, r0, d16
699 ; HARD-LABEL: test_v2i32_v4i16:
701 ; HARD-NEXT: vrev64.16 d16, d0
702 ; HARD-NEXT: vadd.i16 d16, d16, d16
703 ; HARD-NEXT: vrev32.16 d16, d16
704 ; HARD-NEXT: vadd.i32 d16, d16, d16
705 ; HARD-NEXT: vrev64.32 d0, d16
707 %1 = add <4 x i16> %p, %p
708 %2 = bitcast <4 x i16> %1 to <2 x i32>
709 %3 = add <2 x i32> %2, %2
713 define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) {
714 ; SOFT-LABEL: test_v2i32_v8i8:
716 ; SOFT-NEXT: vmov d16, r1, r0
717 ; SOFT-NEXT: vrev64.8 d16, d16
718 ; SOFT-NEXT: vadd.i8 d16, d16, d16
719 ; SOFT-NEXT: vrev32.8 d16, d16
720 ; SOFT-NEXT: vadd.i32 d16, d16, d16
721 ; SOFT-NEXT: vrev64.32 d16, d16
722 ; SOFT-NEXT: vmov r1, r0, d16
725 ; HARD-LABEL: test_v2i32_v8i8:
727 ; HARD-NEXT: vrev64.8 d16, d0
728 ; HARD-NEXT: vadd.i8 d16, d16, d16
729 ; HARD-NEXT: vrev32.8 d16, d16
730 ; HARD-NEXT: vadd.i32 d16, d16, d16
731 ; HARD-NEXT: vrev64.32 d0, d16
733 %1 = add <8 x i8> %p, %p
734 %2 = bitcast <8 x i8> %1 to <2 x i32>
735 %3 = add <2 x i32> %2, %2
739 define <4 x i16> @test_v4i16_i64(i64 %p) {
740 ; SOFT-LABEL: test_v4i16_i64:
742 ; SOFT-NEXT: adds r1, r1, r1
743 ; SOFT-NEXT: adc r0, r0, r0
744 ; SOFT-NEXT: vmov d16, r1, r0
745 ; SOFT-NEXT: vrev64.16 d16, d16
746 ; SOFT-NEXT: vadd.i16 d16, d16, d16
747 ; SOFT-NEXT: vrev64.16 d16, d16
748 ; SOFT-NEXT: vmov r1, r0, d16
751 ; HARD-LABEL: test_v4i16_i64:
753 ; HARD-NEXT: adds r1, r1, r1
754 ; HARD-NEXT: adc r0, r0, r0
755 ; HARD-NEXT: vmov d16, r1, r0
756 ; HARD-NEXT: vrev64.16 d16, d16
757 ; HARD-NEXT: vadd.i16 d16, d16, d16
758 ; HARD-NEXT: vrev64.16 d0, d16
761 %2 = bitcast i64 %1 to <4 x i16>
762 %3 = add <4 x i16> %2, %2
766 define <4 x i16> @test_v4i16_f64(double %p) {
767 ; SOFT-LABEL: test_v4i16_f64:
769 ; SOFT-NEXT: vmov d16, r1, r0
770 ; SOFT-NEXT: vadd.f64 d16, d16, d16
771 ; SOFT-NEXT: vrev64.16 d16, d16
772 ; SOFT-NEXT: vadd.i16 d16, d16, d16
773 ; SOFT-NEXT: vrev64.16 d16, d16
774 ; SOFT-NEXT: vmov r1, r0, d16
777 ; HARD-LABEL: test_v4i16_f64:
779 ; HARD-NEXT: vadd.f64 d16, d0, d0
780 ; HARD-NEXT: vrev64.16 d16, d16
781 ; HARD-NEXT: vadd.i16 d16, d16, d16
782 ; HARD-NEXT: vrev64.16 d0, d16
784 %1 = fadd double %p, %p
785 %2 = bitcast double %1 to <4 x i16>
786 %3 = add <4 x i16> %2, %2
790 define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) {
791 ; SOFT-LABEL: test_v4i16_v1i64:
793 ; SOFT-NEXT: vmov d16, r1, r0
794 ; SOFT-NEXT: vadd.i64 d16, d16, d16
795 ; SOFT-NEXT: vrev64.16 d16, d16
796 ; SOFT-NEXT: vadd.i16 d16, d16, d16
797 ; SOFT-NEXT: vrev64.16 d16, d16
798 ; SOFT-NEXT: vmov r1, r0, d16
801 ; HARD-LABEL: test_v4i16_v1i64:
803 ; HARD-NEXT: vadd.i64 d16, d0, d0
804 ; HARD-NEXT: vrev64.16 d16, d16
805 ; HARD-NEXT: vadd.i16 d16, d16, d16
806 ; HARD-NEXT: vrev64.16 d0, d16
808 %1 = add <1 x i64> %p, %p
809 %2 = bitcast <1 x i64> %1 to <4 x i16>
810 %3 = add <4 x i16> %2, %2
814 define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) {
815 ; SOFT-LABEL: test_v4i16_v2f32:
817 ; SOFT-NEXT: vmov d16, r1, r0
818 ; SOFT-NEXT: vrev64.32 d16, d16
819 ; SOFT-NEXT: vadd.f32 d16, d16, d16
820 ; SOFT-NEXT: vrev32.16 d16, d16
821 ; SOFT-NEXT: vadd.i16 d16, d16, d16
822 ; SOFT-NEXT: vrev64.16 d16, d16
823 ; SOFT-NEXT: vmov r1, r0, d16
826 ; HARD-LABEL: test_v4i16_v2f32:
828 ; HARD-NEXT: vrev64.32 d16, d0
829 ; HARD-NEXT: vadd.f32 d16, d16, d16
830 ; HARD-NEXT: vrev32.16 d16, d16
831 ; HARD-NEXT: vadd.i16 d16, d16, d16
832 ; HARD-NEXT: vrev64.16 d0, d16
834 %1 = fadd <2 x float> %p, %p
835 %2 = bitcast <2 x float> %1 to <4 x i16>
836 %3 = add <4 x i16> %2, %2
840 define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) {
841 ; SOFT-LABEL: test_v4i16_v2i32:
843 ; SOFT-NEXT: vmov d16, r1, r0
844 ; SOFT-NEXT: vrev64.32 d16, d16
845 ; SOFT-NEXT: vadd.i32 d16, d16, d16
846 ; SOFT-NEXT: vrev32.16 d16, d16
847 ; SOFT-NEXT: vadd.i16 d16, d16, d16
848 ; SOFT-NEXT: vrev64.16 d16, d16
849 ; SOFT-NEXT: vmov r1, r0, d16
852 ; HARD-LABEL: test_v4i16_v2i32:
854 ; HARD-NEXT: vrev64.32 d16, d0
855 ; HARD-NEXT: vadd.i32 d16, d16, d16
856 ; HARD-NEXT: vrev32.16 d16, d16
857 ; HARD-NEXT: vadd.i16 d16, d16, d16
858 ; HARD-NEXT: vrev64.16 d0, d16
860 %1 = add <2 x i32> %p, %p
861 %2 = bitcast <2 x i32> %1 to <4 x i16>
862 %3 = add <4 x i16> %2, %2
866 define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) {
867 ; SOFT-LABEL: test_v4i16_v8i8:
869 ; SOFT-NEXT: vmov d16, r1, r0
870 ; SOFT-NEXT: vrev64.8 d16, d16
871 ; SOFT-NEXT: vadd.i8 d16, d16, d16
872 ; SOFT-NEXT: vrev16.8 d16, d16
873 ; SOFT-NEXT: vadd.i16 d16, d16, d16
874 ; SOFT-NEXT: vrev64.16 d16, d16
875 ; SOFT-NEXT: vmov r1, r0, d16
878 ; HARD-LABEL: test_v4i16_v8i8:
880 ; HARD-NEXT: vrev64.8 d16, d0
881 ; HARD-NEXT: vadd.i8 d16, d16, d16
882 ; HARD-NEXT: vrev16.8 d16, d16
883 ; HARD-NEXT: vadd.i16 d16, d16, d16
884 ; HARD-NEXT: vrev64.16 d0, d16
886 %1 = add <8 x i8> %p, %p
887 %2 = bitcast <8 x i8> %1 to <4 x i16>
888 %3 = add <4 x i16> %2, %2
892 define <8 x i8> @test_v8i8_i64(i64 %p) {
893 ; SOFT-LABEL: test_v8i8_i64:
895 ; SOFT-NEXT: adds r1, r1, r1
896 ; SOFT-NEXT: adc r0, r0, r0
897 ; SOFT-NEXT: vmov d16, r1, r0
898 ; SOFT-NEXT: vrev64.8 d16, d16
899 ; SOFT-NEXT: vadd.i8 d16, d16, d16
900 ; SOFT-NEXT: vrev64.8 d16, d16
901 ; SOFT-NEXT: vmov r1, r0, d16
904 ; HARD-LABEL: test_v8i8_i64:
906 ; HARD-NEXT: adds r1, r1, r1
907 ; HARD-NEXT: adc r0, r0, r0
908 ; HARD-NEXT: vmov d16, r1, r0
909 ; HARD-NEXT: vrev64.8 d16, d16
910 ; HARD-NEXT: vadd.i8 d16, d16, d16
911 ; HARD-NEXT: vrev64.8 d0, d16
914 %2 = bitcast i64 %1 to <8 x i8>
915 %3 = add <8 x i8> %2, %2
919 define <8 x i8> @test_v8i8_f64(double %p) {
920 ; SOFT-LABEL: test_v8i8_f64:
922 ; SOFT-NEXT: vmov d16, r1, r0
923 ; SOFT-NEXT: vadd.f64 d16, d16, d16
924 ; SOFT-NEXT: vrev64.8 d16, d16
925 ; SOFT-NEXT: vadd.i8 d16, d16, d16
926 ; SOFT-NEXT: vrev64.8 d16, d16
927 ; SOFT-NEXT: vmov r1, r0, d16
930 ; HARD-LABEL: test_v8i8_f64:
932 ; HARD-NEXT: vadd.f64 d16, d0, d0
933 ; HARD-NEXT: vrev64.8 d16, d16
934 ; HARD-NEXT: vadd.i8 d16, d16, d16
935 ; HARD-NEXT: vrev64.8 d0, d16
937 %1 = fadd double %p, %p
938 %2 = bitcast double %1 to <8 x i8>
939 %3 = add <8 x i8> %2, %2
943 define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) {
944 ; SOFT-LABEL: test_v8i8_v1i64:
946 ; SOFT-NEXT: vmov d16, r1, r0
947 ; SOFT-NEXT: vadd.i64 d16, d16, d16
948 ; SOFT-NEXT: vrev64.8 d16, d16
949 ; SOFT-NEXT: vadd.i8 d16, d16, d16
950 ; SOFT-NEXT: vrev64.8 d16, d16
951 ; SOFT-NEXT: vmov r1, r0, d16
954 ; HARD-LABEL: test_v8i8_v1i64:
956 ; HARD-NEXT: vadd.i64 d16, d0, d0
957 ; HARD-NEXT: vrev64.8 d16, d16
958 ; HARD-NEXT: vadd.i8 d16, d16, d16
959 ; HARD-NEXT: vrev64.8 d0, d16
961 %1 = add <1 x i64> %p, %p
962 %2 = bitcast <1 x i64> %1 to <8 x i8>
963 %3 = add <8 x i8> %2, %2
967 define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) {
968 ; SOFT-LABEL: test_v8i8_v2f32:
970 ; SOFT-NEXT: vmov d16, r1, r0
971 ; SOFT-NEXT: vrev64.32 d16, d16
972 ; SOFT-NEXT: vadd.f32 d16, d16, d16
973 ; SOFT-NEXT: vrev32.8 d16, d16
974 ; SOFT-NEXT: vadd.i8 d16, d16, d16
975 ; SOFT-NEXT: vrev64.8 d16, d16
976 ; SOFT-NEXT: vmov r1, r0, d16
979 ; HARD-LABEL: test_v8i8_v2f32:
981 ; HARD-NEXT: vrev64.32 d16, d0
982 ; HARD-NEXT: vadd.f32 d16, d16, d16
983 ; HARD-NEXT: vrev32.8 d16, d16
984 ; HARD-NEXT: vadd.i8 d16, d16, d16
985 ; HARD-NEXT: vrev64.8 d0, d16
987 %1 = fadd <2 x float> %p, %p
988 %2 = bitcast <2 x float> %1 to <8 x i8>
989 %3 = add <8 x i8> %2, %2
993 define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) {
994 ; SOFT-LABEL: test_v8i8_v2i32:
996 ; SOFT-NEXT: vmov d16, r1, r0
997 ; SOFT-NEXT: vrev64.32 d16, d16
998 ; SOFT-NEXT: vadd.i32 d16, d16, d16
999 ; SOFT-NEXT: vrev32.8 d16, d16
1000 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1001 ; SOFT-NEXT: vrev64.8 d16, d16
1002 ; SOFT-NEXT: vmov r1, r0, d16
1005 ; HARD-LABEL: test_v8i8_v2i32:
1007 ; HARD-NEXT: vrev64.32 d16, d0
1008 ; HARD-NEXT: vadd.i32 d16, d16, d16
1009 ; HARD-NEXT: vrev32.8 d16, d16
1010 ; HARD-NEXT: vadd.i8 d16, d16, d16
1011 ; HARD-NEXT: vrev64.8 d0, d16
1013 %1 = add <2 x i32> %p, %p
1014 %2 = bitcast <2 x i32> %1 to <8 x i8>
1015 %3 = add <8 x i8> %2, %2
1019 define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) {
1020 ; SOFT-LABEL: test_v8i8_v4i16:
1022 ; SOFT-NEXT: vmov d16, r1, r0
1023 ; SOFT-NEXT: vrev64.16 d16, d16
1024 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1025 ; SOFT-NEXT: vrev16.8 d16, d16
1026 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1027 ; SOFT-NEXT: vrev64.8 d16, d16
1028 ; SOFT-NEXT: vmov r1, r0, d16
1031 ; HARD-LABEL: test_v8i8_v4i16:
1033 ; HARD-NEXT: vrev64.16 d16, d0
1034 ; HARD-NEXT: vadd.i16 d16, d16, d16
1035 ; HARD-NEXT: vrev16.8 d16, d16
1036 ; HARD-NEXT: vadd.i8 d16, d16, d16
1037 ; HARD-NEXT: vrev64.8 d0, d16
1039 %1 = add <4 x i16> %p, %p
1040 %2 = bitcast <4 x i16> %1 to <8 x i8>
1041 %3 = add <8 x i8> %2, %2
1045 define fp128 @test_f128_v2f64(<2 x double> %p) {
1046 ; SOFT-LABEL: test_f128_v2f64:
1048 ; SOFT-NEXT: .save {r11, lr}
1049 ; SOFT-NEXT: push {r11, lr}
1050 ; SOFT-NEXT: .pad #16
1051 ; SOFT-NEXT: sub sp, sp, #16
1052 ; SOFT-NEXT: vmov d16, r3, r2
1053 ; SOFT-NEXT: vmov d17, r1, r0
1054 ; SOFT-NEXT: vadd.f64 d19, d16, d16
1055 ; SOFT-NEXT: vadd.f64 d18, d17, d17
1056 ; SOFT-NEXT: vrev64.32 q8, q9
1057 ; SOFT-NEXT: vmov r2, r3, d17
1058 ; SOFT-NEXT: vmov r0, r1, d16
1059 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1060 ; SOFT-NEXT: bl __addtf3
1061 ; SOFT-NEXT: add sp, sp, #16
1062 ; SOFT-NEXT: pop {r11, pc}
1064 ; HARD-LABEL: test_f128_v2f64:
1066 ; HARD-NEXT: .save {r11, lr}
1067 ; HARD-NEXT: push {r11, lr}
1068 ; HARD-NEXT: .pad #16
1069 ; HARD-NEXT: sub sp, sp, #16
1070 ; HARD-NEXT: vadd.f64 d17, d1, d1
1071 ; HARD-NEXT: vadd.f64 d16, d0, d0
1072 ; HARD-NEXT: vrev64.32 q8, q8
1073 ; HARD-NEXT: vmov r2, r3, d17
1074 ; HARD-NEXT: vmov r0, r1, d16
1075 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1076 ; HARD-NEXT: bl __addtf3
1077 ; HARD-NEXT: add sp, sp, #16
1078 ; HARD-NEXT: pop {r11, pc}
1079 %1 = fadd <2 x double> %p, %p
1080 %2 = bitcast <2 x double> %1 to fp128
1081 %3 = fadd fp128 %2, %2
1085 define fp128 @test_f128_v2i64(<2 x i64> %p) {
1086 ; SOFT-LABEL: test_f128_v2i64:
1088 ; SOFT-NEXT: .save {r11, lr}
1089 ; SOFT-NEXT: push {r11, lr}
1090 ; SOFT-NEXT: .pad #16
1091 ; SOFT-NEXT: sub sp, sp, #16
1092 ; SOFT-NEXT: vmov d17, r3, r2
1093 ; SOFT-NEXT: vmov d16, r1, r0
1094 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1095 ; SOFT-NEXT: vrev64.32 q8, q8
1096 ; SOFT-NEXT: vmov r2, r3, d17
1097 ; SOFT-NEXT: vmov r0, r1, d16
1098 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1099 ; SOFT-NEXT: bl __addtf3
1100 ; SOFT-NEXT: add sp, sp, #16
1101 ; SOFT-NEXT: pop {r11, pc}
1103 ; HARD-LABEL: test_f128_v2i64:
1105 ; HARD-NEXT: .save {r11, lr}
1106 ; HARD-NEXT: push {r11, lr}
1107 ; HARD-NEXT: .pad #16
1108 ; HARD-NEXT: sub sp, sp, #16
1109 ; HARD-NEXT: vadd.i64 q8, q0, q0
1110 ; HARD-NEXT: vrev64.32 q8, q8
1111 ; HARD-NEXT: vmov r2, r3, d17
1112 ; HARD-NEXT: vmov r0, r1, d16
1113 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1114 ; HARD-NEXT: bl __addtf3
1115 ; HARD-NEXT: add sp, sp, #16
1116 ; HARD-NEXT: pop {r11, pc}
1117 %1 = add <2 x i64> %p, %p
1118 %2 = bitcast <2 x i64> %1 to fp128
1119 %3 = fadd fp128 %2, %2
1123 define fp128 @test_f128_v4f32(<4 x float> %p) {
1124 ; SOFT-LABEL: test_f128_v4f32:
1126 ; SOFT-NEXT: .save {r11, lr}
1127 ; SOFT-NEXT: push {r11, lr}
1128 ; SOFT-NEXT: .pad #16
1129 ; SOFT-NEXT: sub sp, sp, #16
1130 ; SOFT-NEXT: vmov d17, r3, r2
1131 ; SOFT-NEXT: vmov d16, r1, r0
1132 ; SOFT-NEXT: vrev64.32 q8, q8
1133 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1134 ; SOFT-NEXT: vmov r2, r3, d17
1135 ; SOFT-NEXT: vmov r0, r1, d16
1136 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1137 ; SOFT-NEXT: bl __addtf3
1138 ; SOFT-NEXT: add sp, sp, #16
1139 ; SOFT-NEXT: pop {r11, pc}
1141 ; HARD-LABEL: test_f128_v4f32:
1143 ; HARD-NEXT: .save {r11, lr}
1144 ; HARD-NEXT: push {r11, lr}
1145 ; HARD-NEXT: .pad #16
1146 ; HARD-NEXT: sub sp, sp, #16
1147 ; HARD-NEXT: vrev64.32 q8, q0
1148 ; HARD-NEXT: vadd.f32 q8, q8, q8
1149 ; HARD-NEXT: vmov r2, r3, d17
1150 ; HARD-NEXT: vmov r0, r1, d16
1151 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1152 ; HARD-NEXT: bl __addtf3
1153 ; HARD-NEXT: add sp, sp, #16
1154 ; HARD-NEXT: pop {r11, pc}
1155 %1 = fadd <4 x float> %p, %p
1156 %2 = bitcast <4 x float> %1 to fp128
1157 %3 = fadd fp128 %2, %2
1161 define fp128 @test_f128_v4i32(<4 x i32> %p) {
1162 ; SOFT-LABEL: test_f128_v4i32:
1164 ; SOFT-NEXT: .save {r11, lr}
1165 ; SOFT-NEXT: push {r11, lr}
1166 ; SOFT-NEXT: .pad #16
1167 ; SOFT-NEXT: sub sp, sp, #16
1168 ; SOFT-NEXT: vmov d17, r3, r2
1169 ; SOFT-NEXT: vmov d16, r1, r0
1170 ; SOFT-NEXT: vrev64.32 q8, q8
1171 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1172 ; SOFT-NEXT: vmov r2, r3, d17
1173 ; SOFT-NEXT: vmov r0, r1, d16
1174 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1175 ; SOFT-NEXT: bl __addtf3
1176 ; SOFT-NEXT: add sp, sp, #16
1177 ; SOFT-NEXT: pop {r11, pc}
1179 ; HARD-LABEL: test_f128_v4i32:
1181 ; HARD-NEXT: .save {r11, lr}
1182 ; HARD-NEXT: push {r11, lr}
1183 ; HARD-NEXT: .pad #16
1184 ; HARD-NEXT: sub sp, sp, #16
1185 ; HARD-NEXT: vrev64.32 q8, q0
1186 ; HARD-NEXT: vadd.i32 q8, q8, q8
1187 ; HARD-NEXT: vmov r2, r3, d17
1188 ; HARD-NEXT: vmov r0, r1, d16
1189 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1190 ; HARD-NEXT: bl __addtf3
1191 ; HARD-NEXT: add sp, sp, #16
1192 ; HARD-NEXT: pop {r11, pc}
1193 %1 = add <4 x i32> %p, %p
1194 %2 = bitcast <4 x i32> %1 to fp128
1195 %3 = fadd fp128 %2, %2
1199 define fp128 @test_f128_v8i16(<8 x i16> %p) {
1200 ; SOFT-LABEL: test_f128_v8i16:
1202 ; SOFT-NEXT: .save {r11, lr}
1203 ; SOFT-NEXT: push {r11, lr}
1204 ; SOFT-NEXT: .pad #16
1205 ; SOFT-NEXT: sub sp, sp, #16
1206 ; SOFT-NEXT: vmov d17, r3, r2
1207 ; SOFT-NEXT: vmov d16, r1, r0
1208 ; SOFT-NEXT: vrev64.16 q8, q8
1209 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1210 ; SOFT-NEXT: vrev32.16 q8, q8
1211 ; SOFT-NEXT: vmov r2, r3, d17
1212 ; SOFT-NEXT: vmov r0, r1, d16
1213 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1214 ; SOFT-NEXT: bl __addtf3
1215 ; SOFT-NEXT: add sp, sp, #16
1216 ; SOFT-NEXT: pop {r11, pc}
1218 ; HARD-LABEL: test_f128_v8i16:
1220 ; HARD-NEXT: .save {r11, lr}
1221 ; HARD-NEXT: push {r11, lr}
1222 ; HARD-NEXT: .pad #16
1223 ; HARD-NEXT: sub sp, sp, #16
1224 ; HARD-NEXT: vrev64.16 q8, q0
1225 ; HARD-NEXT: vadd.i16 q8, q8, q8
1226 ; HARD-NEXT: vrev32.16 q8, q8
1227 ; HARD-NEXT: vmov r2, r3, d17
1228 ; HARD-NEXT: vmov r0, r1, d16
1229 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1230 ; HARD-NEXT: bl __addtf3
1231 ; HARD-NEXT: add sp, sp, #16
1232 ; HARD-NEXT: pop {r11, pc}
1233 %1 = add <8 x i16> %p, %p
1234 %2 = bitcast <8 x i16> %1 to fp128
1235 %3 = fadd fp128 %2, %2
1239 define fp128 @test_f128_v16i8(<16 x i8> %p) {
1240 ; SOFT-LABEL: test_f128_v16i8:
1242 ; SOFT-NEXT: .save {r11, lr}
1243 ; SOFT-NEXT: push {r11, lr}
1244 ; SOFT-NEXT: .pad #16
1245 ; SOFT-NEXT: sub sp, sp, #16
1246 ; SOFT-NEXT: vmov d17, r3, r2
1247 ; SOFT-NEXT: vmov d16, r1, r0
1248 ; SOFT-NEXT: vrev64.8 q8, q8
1249 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1250 ; SOFT-NEXT: vrev32.8 q8, q8
1251 ; SOFT-NEXT: vmov r2, r3, d17
1252 ; SOFT-NEXT: vmov r0, r1, d16
1253 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1254 ; SOFT-NEXT: bl __addtf3
1255 ; SOFT-NEXT: add sp, sp, #16
1256 ; SOFT-NEXT: pop {r11, pc}
1258 ; HARD-LABEL: test_f128_v16i8:
1260 ; HARD-NEXT: .save {r11, lr}
1261 ; HARD-NEXT: push {r11, lr}
1262 ; HARD-NEXT: .pad #16
1263 ; HARD-NEXT: sub sp, sp, #16
1264 ; HARD-NEXT: vrev64.8 q8, q0
1265 ; HARD-NEXT: vadd.i8 q8, q8, q8
1266 ; HARD-NEXT: vrev32.8 q8, q8
1267 ; HARD-NEXT: vmov r2, r3, d17
1268 ; HARD-NEXT: vmov r0, r1, d16
1269 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1270 ; HARD-NEXT: bl __addtf3
1271 ; HARD-NEXT: add sp, sp, #16
1272 ; HARD-NEXT: pop {r11, pc}
1273 %1 = add <16 x i8> %p, %p
1274 %2 = bitcast <16 x i8> %1 to fp128
1275 %3 = fadd fp128 %2, %2
1279 define <2 x double> @test_v2f64_f128(fp128 %p) {
1280 ; SOFT-LABEL: test_v2f64_f128:
1282 ; SOFT-NEXT: .save {r11, lr}
1283 ; SOFT-NEXT: push {r11, lr}
1284 ; SOFT-NEXT: .pad #16
1285 ; SOFT-NEXT: sub sp, sp, #16
1286 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1287 ; SOFT-NEXT: bl __addtf3
1288 ; SOFT-NEXT: vmov.32 d17[0], r2
1289 ; SOFT-NEXT: vmov.32 d16[0], r0
1290 ; SOFT-NEXT: vmov.32 d17[1], r3
1291 ; SOFT-NEXT: vmov.32 d16[1], r1
1292 ; SOFT-NEXT: vrev64.32 q8, q8
1293 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1294 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1295 ; SOFT-NEXT: vmov r1, r0, d18
1296 ; SOFT-NEXT: vmov r3, r2, d16
1297 ; SOFT-NEXT: add sp, sp, #16
1298 ; SOFT-NEXT: pop {r11, pc}
1300 ; HARD-LABEL: test_v2f64_f128:
1302 ; HARD-NEXT: .save {r11, lr}
1303 ; HARD-NEXT: push {r11, lr}
1304 ; HARD-NEXT: .pad #16
1305 ; HARD-NEXT: sub sp, sp, #16
1306 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1307 ; HARD-NEXT: bl __addtf3
1308 ; HARD-NEXT: vmov.32 d17[0], r2
1309 ; HARD-NEXT: vmov.32 d16[0], r0
1310 ; HARD-NEXT: vmov.32 d17[1], r3
1311 ; HARD-NEXT: vmov.32 d16[1], r1
1312 ; HARD-NEXT: vrev64.32 q8, q8
1313 ; HARD-NEXT: vadd.f64 d1, d17, d17
1314 ; HARD-NEXT: vadd.f64 d0, d16, d16
1315 ; HARD-NEXT: add sp, sp, #16
1316 ; HARD-NEXT: pop {r11, pc}
1317 %1 = fadd fp128 %p, %p
1318 %2 = bitcast fp128 %1 to <2 x double>
1319 %3 = fadd <2 x double> %2, %2
1323 define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) {
1324 ; SOFT-LABEL: test_v2f64_v2i64:
1326 ; SOFT-NEXT: vmov d17, r3, r2
1327 ; SOFT-NEXT: vmov d16, r1, r0
1328 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1329 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1330 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1331 ; SOFT-NEXT: vmov r1, r0, d18
1332 ; SOFT-NEXT: vmov r3, r2, d16
1335 ; HARD-LABEL: test_v2f64_v2i64:
1337 ; HARD-NEXT: vadd.i64 q8, q0, q0
1338 ; HARD-NEXT: vadd.f64 d1, d17, d17
1339 ; HARD-NEXT: vadd.f64 d0, d16, d16
1341 %1 = add <2 x i64> %p, %p
1342 %2 = bitcast <2 x i64> %1 to <2 x double>
1343 %3 = fadd <2 x double> %2, %2
1347 define <2 x double> @test_v2f64_v4f32(<4 x float> %p) {
1348 ; SOFT-LABEL: test_v2f64_v4f32:
1350 ; SOFT-NEXT: vmov d17, r3, r2
1351 ; SOFT-NEXT: vmov d16, r1, r0
1352 ; SOFT-NEXT: vrev64.32 q8, q8
1353 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1354 ; SOFT-NEXT: vrev64.32 q8, q8
1355 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1356 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1357 ; SOFT-NEXT: vmov r1, r0, d18
1358 ; SOFT-NEXT: vmov r3, r2, d16
1361 ; HARD-LABEL: test_v2f64_v4f32:
1363 ; HARD-NEXT: vrev64.32 q8, q0
1364 ; HARD-NEXT: vadd.f32 q8, q8, q8
1365 ; HARD-NEXT: vrev64.32 q8, q8
1366 ; HARD-NEXT: vadd.f64 d1, d17, d17
1367 ; HARD-NEXT: vadd.f64 d0, d16, d16
1369 %1 = fadd <4 x float> %p, %p
1370 %2 = bitcast <4 x float> %1 to <2 x double>
1371 %3 = fadd <2 x double> %2, %2
1375 define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) {
1376 ; SOFT-LABEL: test_v2f64_v4i32:
1378 ; SOFT-NEXT: vmov d17, r3, r2
1379 ; SOFT-NEXT: vmov d16, r1, r0
1380 ; SOFT-NEXT: vrev64.32 q8, q8
1381 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1382 ; SOFT-NEXT: vrev64.32 q8, q8
1383 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1384 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1385 ; SOFT-NEXT: vmov r1, r0, d18
1386 ; SOFT-NEXT: vmov r3, r2, d16
1389 ; HARD-LABEL: test_v2f64_v4i32:
1391 ; HARD-NEXT: vrev64.32 q8, q0
1392 ; HARD-NEXT: vadd.i32 q8, q8, q8
1393 ; HARD-NEXT: vrev64.32 q8, q8
1394 ; HARD-NEXT: vadd.f64 d1, d17, d17
1395 ; HARD-NEXT: vadd.f64 d0, d16, d16
1397 %1 = add <4 x i32> %p, %p
1398 %2 = bitcast <4 x i32> %1 to <2 x double>
1399 %3 = fadd <2 x double> %2, %2
1403 define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) {
1404 ; SOFT-LABEL: test_v2f64_v8i16:
1406 ; SOFT-NEXT: vmov d17, r3, r2
1407 ; SOFT-NEXT: vmov d16, r1, r0
1408 ; SOFT-NEXT: vrev64.16 q8, q8
1409 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1410 ; SOFT-NEXT: vrev64.16 q8, q8
1411 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1412 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1413 ; SOFT-NEXT: vmov r1, r0, d18
1414 ; SOFT-NEXT: vmov r3, r2, d16
1417 ; HARD-LABEL: test_v2f64_v8i16:
1419 ; HARD-NEXT: vrev64.16 q8, q0
1420 ; HARD-NEXT: vadd.i16 q8, q8, q8
1421 ; HARD-NEXT: vrev64.16 q8, q8
1422 ; HARD-NEXT: vadd.f64 d1, d17, d17
1423 ; HARD-NEXT: vadd.f64 d0, d16, d16
1425 %1 = add <8 x i16> %p, %p
1426 %2 = bitcast <8 x i16> %1 to <2 x double>
1427 %3 = fadd <2 x double> %2, %2
1431 define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) {
1432 ; SOFT-LABEL: test_v2f64_v16i8:
1434 ; SOFT-NEXT: vmov d17, r3, r2
1435 ; SOFT-NEXT: vmov d16, r1, r0
1436 ; SOFT-NEXT: vrev64.8 q8, q8
1437 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1438 ; SOFT-NEXT: vrev64.8 q8, q8
1439 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1440 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1441 ; SOFT-NEXT: vmov r1, r0, d18
1442 ; SOFT-NEXT: vmov r3, r2, d16
1445 ; HARD-LABEL: test_v2f64_v16i8:
1447 ; HARD-NEXT: vrev64.8 q8, q0
1448 ; HARD-NEXT: vadd.i8 q8, q8, q8
1449 ; HARD-NEXT: vrev64.8 q8, q8
1450 ; HARD-NEXT: vadd.f64 d1, d17, d17
1451 ; HARD-NEXT: vadd.f64 d0, d16, d16
1453 %1 = add <16 x i8> %p, %p
1454 %2 = bitcast <16 x i8> %1 to <2 x double>
1455 %3 = fadd <2 x double> %2, %2
1459 define <2 x i64> @test_v2i64_f128(fp128 %p) {
1460 ; SOFT-LABEL: test_v2i64_f128:
1462 ; SOFT-NEXT: .save {r11, lr}
1463 ; SOFT-NEXT: push {r11, lr}
1464 ; SOFT-NEXT: .pad #16
1465 ; SOFT-NEXT: sub sp, sp, #16
1466 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1467 ; SOFT-NEXT: bl __addtf3
1468 ; SOFT-NEXT: vmov.32 d17[0], r2
1469 ; SOFT-NEXT: vmov.32 d16[0], r0
1470 ; SOFT-NEXT: vmov.32 d17[1], r3
1471 ; SOFT-NEXT: vmov.32 d16[1], r1
1472 ; SOFT-NEXT: vrev64.32 q8, q8
1473 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1474 ; SOFT-NEXT: vmov r1, r0, d16
1475 ; SOFT-NEXT: vmov r3, r2, d17
1476 ; SOFT-NEXT: add sp, sp, #16
1477 ; SOFT-NEXT: pop {r11, pc}
1479 ; HARD-LABEL: test_v2i64_f128:
1481 ; HARD-NEXT: .save {r11, lr}
1482 ; HARD-NEXT: push {r11, lr}
1483 ; HARD-NEXT: .pad #16
1484 ; HARD-NEXT: sub sp, sp, #16
1485 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1486 ; HARD-NEXT: bl __addtf3
1487 ; HARD-NEXT: vmov.32 d17[0], r2
1488 ; HARD-NEXT: vmov.32 d16[0], r0
1489 ; HARD-NEXT: vmov.32 d17[1], r3
1490 ; HARD-NEXT: vmov.32 d16[1], r1
1491 ; HARD-NEXT: vrev64.32 q8, q8
1492 ; HARD-NEXT: vadd.i64 q0, q8, q8
1493 ; HARD-NEXT: add sp, sp, #16
1494 ; HARD-NEXT: pop {r11, pc}
1495 %1 = fadd fp128 %p, %p
1496 %2 = bitcast fp128 %1 to <2 x i64>
1497 %3 = add <2 x i64> %2, %2
1501 define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) {
1502 ; SOFT-LABEL: test_v2i64_v2f64:
1504 ; SOFT-NEXT: vmov d16, r3, r2
1505 ; SOFT-NEXT: vmov d17, r1, r0
1506 ; SOFT-NEXT: vadd.f64 d19, d16, d16
1507 ; SOFT-NEXT: vadd.f64 d18, d17, d17
1508 ; SOFT-NEXT: vadd.i64 q8, q9, q9
1509 ; SOFT-NEXT: vmov r1, r0, d16
1510 ; SOFT-NEXT: vmov r3, r2, d17
1513 ; HARD-LABEL: test_v2i64_v2f64:
1515 ; HARD-NEXT: vadd.f64 d17, d1, d1
1516 ; HARD-NEXT: vadd.f64 d16, d0, d0
1517 ; HARD-NEXT: vadd.i64 q0, q8, q8
1519 %1 = fadd <2 x double> %p, %p
1520 %2 = bitcast <2 x double> %1 to <2 x i64>
1521 %3 = add <2 x i64> %2, %2
1525 define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) {
1526 ; SOFT-LABEL: test_v2i64_v4f32:
1528 ; SOFT-NEXT: vmov d17, r3, r2
1529 ; SOFT-NEXT: vmov d16, r1, r0
1530 ; SOFT-NEXT: vrev64.32 q8, q8
1531 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1532 ; SOFT-NEXT: vrev64.32 q8, q8
1533 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1534 ; SOFT-NEXT: vmov r1, r0, d16
1535 ; SOFT-NEXT: vmov r3, r2, d17
1538 ; HARD-LABEL: test_v2i64_v4f32:
1540 ; HARD-NEXT: vrev64.32 q8, q0
1541 ; HARD-NEXT: vadd.f32 q8, q8, q8
1542 ; HARD-NEXT: vrev64.32 q8, q8
1543 ; HARD-NEXT: vadd.i64 q0, q8, q8
1545 %1 = fadd <4 x float> %p, %p
1546 %2 = bitcast <4 x float> %1 to <2 x i64>
1547 %3 = add <2 x i64> %2, %2
1551 define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) {
1552 ; SOFT-LABEL: test_v2i64_v4i32:
1554 ; SOFT-NEXT: vmov d17, r3, r2
1555 ; SOFT-NEXT: vmov d16, r1, r0
1556 ; SOFT-NEXT: vrev64.32 q8, q8
1557 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1558 ; SOFT-NEXT: vrev64.32 q8, q8
1559 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1560 ; SOFT-NEXT: vmov r1, r0, d16
1561 ; SOFT-NEXT: vmov r3, r2, d17
1564 ; HARD-LABEL: test_v2i64_v4i32:
1566 ; HARD-NEXT: vrev64.32 q8, q0
1567 ; HARD-NEXT: vadd.i32 q8, q8, q8
1568 ; HARD-NEXT: vrev64.32 q8, q8
1569 ; HARD-NEXT: vadd.i64 q0, q8, q8
1571 %1 = add <4 x i32> %p, %p
1572 %2 = bitcast <4 x i32> %1 to <2 x i64>
1573 %3 = add <2 x i64> %2, %2
1577 define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) {
1578 ; SOFT-LABEL: test_v2i64_v8i16:
1580 ; SOFT-NEXT: vmov d17, r3, r2
1581 ; SOFT-NEXT: vmov d16, r1, r0
1582 ; SOFT-NEXT: vrev64.16 q8, q8
1583 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1584 ; SOFT-NEXT: vrev64.16 q8, q8
1585 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1586 ; SOFT-NEXT: vmov r1, r0, d16
1587 ; SOFT-NEXT: vmov r3, r2, d17
1590 ; HARD-LABEL: test_v2i64_v8i16:
1592 ; HARD-NEXT: vrev64.16 q8, q0
1593 ; HARD-NEXT: vadd.i16 q8, q8, q8
1594 ; HARD-NEXT: vrev64.16 q8, q8
1595 ; HARD-NEXT: vadd.i64 q0, q8, q8
1597 %1 = add <8 x i16> %p, %p
1598 %2 = bitcast <8 x i16> %1 to <2 x i64>
1599 %3 = add <2 x i64> %2, %2
1603 define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) {
1604 ; SOFT-LABEL: test_v2i64_v16i8:
1606 ; SOFT-NEXT: vmov d17, r3, r2
1607 ; SOFT-NEXT: vmov d16, r1, r0
1608 ; SOFT-NEXT: vrev64.8 q8, q8
1609 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1610 ; SOFT-NEXT: vrev64.8 q8, q8
1611 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1612 ; SOFT-NEXT: vmov r1, r0, d16
1613 ; SOFT-NEXT: vmov r3, r2, d17
1616 ; HARD-LABEL: test_v2i64_v16i8:
1618 ; HARD-NEXT: vrev64.8 q8, q0
1619 ; HARD-NEXT: vadd.i8 q8, q8, q8
1620 ; HARD-NEXT: vrev64.8 q8, q8
1621 ; HARD-NEXT: vadd.i64 q0, q8, q8
1623 %1 = add <16 x i8> %p, %p
1624 %2 = bitcast <16 x i8> %1 to <2 x i64>
1625 %3 = add <2 x i64> %2, %2
1629 define <4 x float> @test_v4f32_f128(fp128 %p) {
1630 ; SOFT-LABEL: test_v4f32_f128:
1632 ; SOFT-NEXT: .save {r11, lr}
1633 ; SOFT-NEXT: push {r11, lr}
1634 ; SOFT-NEXT: .pad #16
1635 ; SOFT-NEXT: sub sp, sp, #16
1636 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1637 ; SOFT-NEXT: bl __addtf3
1638 ; SOFT-NEXT: vmov.32 d17[0], r2
1639 ; SOFT-NEXT: vmov.32 d16[0], r0
1640 ; SOFT-NEXT: vmov.32 d17[1], r3
1641 ; SOFT-NEXT: vmov.32 d16[1], r1
1642 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1643 ; SOFT-NEXT: vrev64.32 q8, q8
1644 ; SOFT-NEXT: vmov r1, r0, d16
1645 ; SOFT-NEXT: vmov r3, r2, d17
1646 ; SOFT-NEXT: add sp, sp, #16
1647 ; SOFT-NEXT: pop {r11, pc}
1649 ; HARD-LABEL: test_v4f32_f128:
1651 ; HARD-NEXT: .save {r11, lr}
1652 ; HARD-NEXT: push {r11, lr}
1653 ; HARD-NEXT: .pad #16
1654 ; HARD-NEXT: sub sp, sp, #16
1655 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1656 ; HARD-NEXT: bl __addtf3
1657 ; HARD-NEXT: vmov.32 d17[0], r2
1658 ; HARD-NEXT: vmov.32 d16[0], r0
1659 ; HARD-NEXT: vmov.32 d17[1], r3
1660 ; HARD-NEXT: vmov.32 d16[1], r1
1661 ; HARD-NEXT: vadd.f32 q8, q8, q8
1662 ; HARD-NEXT: vrev64.32 q0, q8
1663 ; HARD-NEXT: add sp, sp, #16
1664 ; HARD-NEXT: pop {r11, pc}
1665 %1 = fadd fp128 %p, %p
1666 %2 = bitcast fp128 %1 to <4 x float>
1667 %3 = fadd <4 x float> %2, %2
1671 define <4 x float> @test_v4f32_v2f64(<2 x double> %p) {
1672 ; SOFT-LABEL: test_v4f32_v2f64:
1674 ; SOFT-NEXT: vmov d16, r3, r2
1675 ; SOFT-NEXT: vmov d17, r1, r0
1676 ; SOFT-NEXT: vadd.f64 d19, d16, d16
1677 ; SOFT-NEXT: vadd.f64 d18, d17, d17
1678 ; SOFT-NEXT: vrev64.32 q8, q9
1679 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1680 ; SOFT-NEXT: vrev64.32 q8, q8
1681 ; SOFT-NEXT: vmov r1, r0, d16
1682 ; SOFT-NEXT: vmov r3, r2, d17
1685 ; HARD-LABEL: test_v4f32_v2f64:
1687 ; HARD-NEXT: vadd.f64 d17, d1, d1
1688 ; HARD-NEXT: vadd.f64 d16, d0, d0
1689 ; HARD-NEXT: vrev64.32 q8, q8
1690 ; HARD-NEXT: vadd.f32 q8, q8, q8
1691 ; HARD-NEXT: vrev64.32 q0, q8
1693 %1 = fadd <2 x double> %p, %p
1694 %2 = bitcast <2 x double> %1 to <4 x float>
1695 %3 = fadd <4 x float> %2, %2
1699 define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) {
1700 ; SOFT-LABEL: test_v4f32_v2i64:
1702 ; SOFT-NEXT: vmov d17, r3, r2
1703 ; SOFT-NEXT: vmov d16, r1, r0
1704 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1705 ; SOFT-NEXT: vrev64.32 q8, q8
1706 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1707 ; SOFT-NEXT: vrev64.32 q8, q8
1708 ; SOFT-NEXT: vmov r1, r0, d16
1709 ; SOFT-NEXT: vmov r3, r2, d17
1712 ; HARD-LABEL: test_v4f32_v2i64:
1714 ; HARD-NEXT: vadd.i64 q8, q0, q0
1715 ; HARD-NEXT: vrev64.32 q8, q8
1716 ; HARD-NEXT: vadd.f32 q8, q8, q8
1717 ; HARD-NEXT: vrev64.32 q0, q8
1719 %1 = add <2 x i64> %p, %p
1720 %2 = bitcast <2 x i64> %1 to <4 x float>
1721 %3 = fadd <4 x float> %2, %2
1725 define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) {
1726 ; SOFT-LABEL: test_v4f32_v4i32:
1728 ; SOFT-NEXT: vmov d17, r3, r2
1729 ; SOFT-NEXT: vmov d16, r1, r0
1730 ; SOFT-NEXT: vrev64.32 q8, q8
1731 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1732 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1733 ; SOFT-NEXT: vrev64.32 q8, q8
1734 ; SOFT-NEXT: vmov r1, r0, d16
1735 ; SOFT-NEXT: vmov r3, r2, d17
1738 ; HARD-LABEL: test_v4f32_v4i32:
1740 ; HARD-NEXT: vrev64.32 q8, q0
1741 ; HARD-NEXT: vadd.i32 q8, q8, q8
1742 ; HARD-NEXT: vadd.f32 q8, q8, q8
1743 ; HARD-NEXT: vrev64.32 q0, q8
1745 %1 = add <4 x i32> %p, %p
1746 %2 = bitcast <4 x i32> %1 to <4 x float>
1747 %3 = fadd <4 x float> %2, %2
1751 define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) {
1752 ; SOFT-LABEL: test_v4f32_v8i16:
1754 ; SOFT-NEXT: vmov d17, r3, r2
1755 ; SOFT-NEXT: vmov d16, r1, r0
1756 ; SOFT-NEXT: vrev64.16 q8, q8
1757 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1758 ; SOFT-NEXT: vrev32.16 q8, q8
1759 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1760 ; SOFT-NEXT: vrev64.32 q8, q8
1761 ; SOFT-NEXT: vmov r1, r0, d16
1762 ; SOFT-NEXT: vmov r3, r2, d17
1765 ; HARD-LABEL: test_v4f32_v8i16:
1767 ; HARD-NEXT: vrev64.16 q8, q0
1768 ; HARD-NEXT: vadd.i16 q8, q8, q8
1769 ; HARD-NEXT: vrev32.16 q8, q8
1770 ; HARD-NEXT: vadd.f32 q8, q8, q8
1771 ; HARD-NEXT: vrev64.32 q0, q8
1773 %1 = add <8 x i16> %p, %p
1774 %2 = bitcast <8 x i16> %1 to <4 x float>
1775 %3 = fadd <4 x float> %2, %2
1779 define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) {
1780 ; SOFT-LABEL: test_v4f32_v16i8:
1782 ; SOFT-NEXT: vmov d17, r3, r2
1783 ; SOFT-NEXT: vmov d16, r1, r0
1784 ; SOFT-NEXT: vrev64.8 q8, q8
1785 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1786 ; SOFT-NEXT: vrev32.8 q8, q8
1787 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1788 ; SOFT-NEXT: vrev64.32 q8, q8
1789 ; SOFT-NEXT: vmov r1, r0, d16
1790 ; SOFT-NEXT: vmov r3, r2, d17
1793 ; HARD-LABEL: test_v4f32_v16i8:
1795 ; HARD-NEXT: vrev64.8 q8, q0
1796 ; HARD-NEXT: vadd.i8 q8, q8, q8
1797 ; HARD-NEXT: vrev32.8 q8, q8
1798 ; HARD-NEXT: vadd.f32 q8, q8, q8
1799 ; HARD-NEXT: vrev64.32 q0, q8
1801 %1 = add <16 x i8> %p, %p
1802 %2 = bitcast <16 x i8> %1 to <4 x float>
1803 %3 = fadd <4 x float> %2, %2
1807 define <4 x i32> @test_v4i32_f128(fp128 %p) {
1808 ; SOFT-LABEL: test_v4i32_f128:
1810 ; SOFT-NEXT: .save {r11, lr}
1811 ; SOFT-NEXT: push {r11, lr}
1812 ; SOFT-NEXT: .pad #16
1813 ; SOFT-NEXT: sub sp, sp, #16
1814 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1815 ; SOFT-NEXT: bl __addtf3
1816 ; SOFT-NEXT: vmov.32 d17[0], r2
1817 ; SOFT-NEXT: vmov.32 d16[0], r0
1818 ; SOFT-NEXT: vmov.32 d17[1], r3
1819 ; SOFT-NEXT: vmov.32 d16[1], r1
1820 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1821 ; SOFT-NEXT: vrev64.32 q8, q8
1822 ; SOFT-NEXT: vmov r1, r0, d16
1823 ; SOFT-NEXT: vmov r3, r2, d17
1824 ; SOFT-NEXT: add sp, sp, #16
1825 ; SOFT-NEXT: pop {r11, pc}
1827 ; HARD-LABEL: test_v4i32_f128:
1829 ; HARD-NEXT: .save {r11, lr}
1830 ; HARD-NEXT: push {r11, lr}
1831 ; HARD-NEXT: .pad #16
1832 ; HARD-NEXT: sub sp, sp, #16
1833 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1834 ; HARD-NEXT: bl __addtf3
1835 ; HARD-NEXT: vmov.32 d17[0], r2
1836 ; HARD-NEXT: vmov.32 d16[0], r0
1837 ; HARD-NEXT: vmov.32 d17[1], r3
1838 ; HARD-NEXT: vmov.32 d16[1], r1
1839 ; HARD-NEXT: vadd.i32 q8, q8, q8
1840 ; HARD-NEXT: vrev64.32 q0, q8
1841 ; HARD-NEXT: add sp, sp, #16
1842 ; HARD-NEXT: pop {r11, pc}
1843 %1 = fadd fp128 %p, %p
1844 %2 = bitcast fp128 %1 to <4 x i32>
1845 %3 = add <4 x i32> %2, %2
1849 define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) {
1850 ; SOFT-LABEL: test_v4i32_v2f64:
1852 ; SOFT-NEXT: vmov d16, r3, r2
1853 ; SOFT-NEXT: vmov d17, r1, r0
1854 ; SOFT-NEXT: vadd.f64 d19, d16, d16
1855 ; SOFT-NEXT: vadd.f64 d18, d17, d17
1856 ; SOFT-NEXT: vrev64.32 q8, q9
1857 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1858 ; SOFT-NEXT: vrev64.32 q8, q8
1859 ; SOFT-NEXT: vmov r1, r0, d16
1860 ; SOFT-NEXT: vmov r3, r2, d17
1863 ; HARD-LABEL: test_v4i32_v2f64:
1865 ; HARD-NEXT: vadd.f64 d17, d1, d1
1866 ; HARD-NEXT: vadd.f64 d16, d0, d0
1867 ; HARD-NEXT: vrev64.32 q8, q8
1868 ; HARD-NEXT: vadd.i32 q8, q8, q8
1869 ; HARD-NEXT: vrev64.32 q0, q8
1871 %1 = fadd <2 x double> %p, %p
1872 %2 = bitcast <2 x double> %1 to <4 x i32>
1873 %3 = add <4 x i32> %2, %2
1877 define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) {
1878 ; SOFT-LABEL: test_v4i32_v2i64:
1880 ; SOFT-NEXT: vmov d17, r3, r2
1881 ; SOFT-NEXT: vmov d16, r1, r0
1882 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1883 ; SOFT-NEXT: vrev64.32 q8, q8
1884 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1885 ; SOFT-NEXT: vrev64.32 q8, q8
1886 ; SOFT-NEXT: vmov r1, r0, d16
1887 ; SOFT-NEXT: vmov r3, r2, d17
1890 ; HARD-LABEL: test_v4i32_v2i64:
1892 ; HARD-NEXT: vadd.i64 q8, q0, q0
1893 ; HARD-NEXT: vrev64.32 q8, q8
1894 ; HARD-NEXT: vadd.i32 q8, q8, q8
1895 ; HARD-NEXT: vrev64.32 q0, q8
1897 %1 = add <2 x i64> %p, %p
1898 %2 = bitcast <2 x i64> %1 to <4 x i32>
1899 %3 = add <4 x i32> %2, %2
1903 define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) {
1904 ; SOFT-LABEL: test_v4i32_v4f32:
1906 ; SOFT-NEXT: vmov d17, r3, r2
1907 ; SOFT-NEXT: vmov d16, r1, r0
1908 ; SOFT-NEXT: vrev64.32 q8, q8
1909 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1910 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1911 ; SOFT-NEXT: vrev64.32 q8, q8
1912 ; SOFT-NEXT: vmov r1, r0, d16
1913 ; SOFT-NEXT: vmov r3, r2, d17
1916 ; HARD-LABEL: test_v4i32_v4f32:
1918 ; HARD-NEXT: vrev64.32 q8, q0
1919 ; HARD-NEXT: vadd.f32 q8, q8, q8
1920 ; HARD-NEXT: vadd.i32 q8, q8, q8
1921 ; HARD-NEXT: vrev64.32 q0, q8
1923 %1 = fadd <4 x float> %p, %p
1924 %2 = bitcast <4 x float> %1 to <4 x i32>
1925 %3 = add <4 x i32> %2, %2
1929 define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) {
1930 ; SOFT-LABEL: test_v4i32_v8i16:
1932 ; SOFT-NEXT: vmov d17, r3, r2
1933 ; SOFT-NEXT: vmov d16, r1, r0
1934 ; SOFT-NEXT: vrev64.16 q8, q8
1935 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1936 ; SOFT-NEXT: vrev32.16 q8, q8
1937 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1938 ; SOFT-NEXT: vrev64.32 q8, q8
1939 ; SOFT-NEXT: vmov r1, r0, d16
1940 ; SOFT-NEXT: vmov r3, r2, d17
1943 ; HARD-LABEL: test_v4i32_v8i16:
1945 ; HARD-NEXT: vrev64.16 q8, q0
1946 ; HARD-NEXT: vadd.i16 q8, q8, q8
1947 ; HARD-NEXT: vrev32.16 q8, q8
1948 ; HARD-NEXT: vadd.i32 q8, q8, q8
1949 ; HARD-NEXT: vrev64.32 q0, q8
1951 %1 = add <8 x i16> %p, %p
1952 %2 = bitcast <8 x i16> %1 to <4 x i32>
1953 %3 = add <4 x i32> %2, %2
1957 define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) {
1958 ; SOFT-LABEL: test_v4i32_v16i8:
1960 ; SOFT-NEXT: vmov d17, r3, r2
1961 ; SOFT-NEXT: vmov d16, r1, r0
1962 ; SOFT-NEXT: vrev64.8 q8, q8
1963 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1964 ; SOFT-NEXT: vrev32.8 q8, q8
1965 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1966 ; SOFT-NEXT: vrev64.32 q8, q8
1967 ; SOFT-NEXT: vmov r1, r0, d16
1968 ; SOFT-NEXT: vmov r3, r2, d17
1971 ; HARD-LABEL: test_v4i32_v16i8:
1973 ; HARD-NEXT: vrev64.8 q8, q0
1974 ; HARD-NEXT: vadd.i8 q8, q8, q8
1975 ; HARD-NEXT: vrev32.8 q8, q8
1976 ; HARD-NEXT: vadd.i32 q8, q8, q8
1977 ; HARD-NEXT: vrev64.32 q0, q8
1979 %1 = add <16 x i8> %p, %p
1980 %2 = bitcast <16 x i8> %1 to <4 x i32>
1981 %3 = add <4 x i32> %2, %2
1985 define <8 x i16> @test_v8i16_f128(fp128 %p) {
1986 ; SOFT-LABEL: test_v8i16_f128:
1988 ; SOFT-NEXT: .save {r11, lr}
1989 ; SOFT-NEXT: push {r11, lr}
1990 ; SOFT-NEXT: .pad #16
1991 ; SOFT-NEXT: sub sp, sp, #16
1992 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1993 ; SOFT-NEXT: bl __addtf3
1994 ; SOFT-NEXT: vmov.32 d17[0], r2
1995 ; SOFT-NEXT: vmov.32 d16[0], r0
1996 ; SOFT-NEXT: vmov.32 d17[1], r3
1997 ; SOFT-NEXT: vmov.32 d16[1], r1
1998 ; SOFT-NEXT: vrev32.16 q8, q8
1999 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2000 ; SOFT-NEXT: vrev64.16 q8, q8
2001 ; SOFT-NEXT: vmov r1, r0, d16
2002 ; SOFT-NEXT: vmov r3, r2, d17
2003 ; SOFT-NEXT: add sp, sp, #16
2004 ; SOFT-NEXT: pop {r11, pc}
2006 ; HARD-LABEL: test_v8i16_f128:
2008 ; HARD-NEXT: .save {r11, lr}
2009 ; HARD-NEXT: push {r11, lr}
2010 ; HARD-NEXT: .pad #16
2011 ; HARD-NEXT: sub sp, sp, #16
2012 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
2013 ; HARD-NEXT: bl __addtf3
2014 ; HARD-NEXT: vmov.32 d17[0], r2
2015 ; HARD-NEXT: vmov.32 d16[0], r0
2016 ; HARD-NEXT: vmov.32 d17[1], r3
2017 ; HARD-NEXT: vmov.32 d16[1], r1
2018 ; HARD-NEXT: vrev32.16 q8, q8
2019 ; HARD-NEXT: vadd.i16 q8, q8, q8
2020 ; HARD-NEXT: vrev64.16 q0, q8
2021 ; HARD-NEXT: add sp, sp, #16
2022 ; HARD-NEXT: pop {r11, pc}
2023 %1 = fadd fp128 %p, %p
2024 %2 = bitcast fp128 %1 to <8 x i16>
2025 %3 = add <8 x i16> %2, %2
2029 define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) {
2030 ; SOFT-LABEL: test_v8i16_v2f64:
2032 ; SOFT-NEXT: vmov d16, r3, r2
2033 ; SOFT-NEXT: vmov d17, r1, r0
2034 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2035 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2036 ; SOFT-NEXT: vrev64.16 q8, q9
2037 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2038 ; SOFT-NEXT: vrev64.16 q8, q8
2039 ; SOFT-NEXT: vmov r1, r0, d16
2040 ; SOFT-NEXT: vmov r3, r2, d17
2043 ; HARD-LABEL: test_v8i16_v2f64:
2045 ; HARD-NEXT: vadd.f64 d17, d1, d1
2046 ; HARD-NEXT: vadd.f64 d16, d0, d0
2047 ; HARD-NEXT: vrev64.16 q8, q8
2048 ; HARD-NEXT: vadd.i16 q8, q8, q8
2049 ; HARD-NEXT: vrev64.16 q0, q8
2051 %1 = fadd <2 x double> %p, %p
2052 %2 = bitcast <2 x double> %1 to <8 x i16>
2053 %3 = add <8 x i16> %2, %2
2057 define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) {
2058 ; SOFT-LABEL: test_v8i16_v2i64:
2060 ; SOFT-NEXT: vmov d17, r3, r2
2061 ; SOFT-NEXT: vmov d16, r1, r0
2062 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2063 ; SOFT-NEXT: vrev64.16 q8, q8
2064 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2065 ; SOFT-NEXT: vrev64.16 q8, q8
2066 ; SOFT-NEXT: vmov r1, r0, d16
2067 ; SOFT-NEXT: vmov r3, r2, d17
2070 ; HARD-LABEL: test_v8i16_v2i64:
2072 ; HARD-NEXT: vadd.i64 q8, q0, q0
2073 ; HARD-NEXT: vrev64.16 q8, q8
2074 ; HARD-NEXT: vadd.i16 q8, q8, q8
2075 ; HARD-NEXT: vrev64.16 q0, q8
2077 %1 = add <2 x i64> %p, %p
2078 %2 = bitcast <2 x i64> %1 to <8 x i16>
2079 %3 = add <8 x i16> %2, %2
2083 define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) {
2084 ; SOFT-LABEL: test_v8i16_v4f32:
2086 ; SOFT-NEXT: vmov d17, r3, r2
2087 ; SOFT-NEXT: vmov d16, r1, r0
2088 ; SOFT-NEXT: vrev64.32 q8, q8
2089 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2090 ; SOFT-NEXT: vrev32.16 q8, q8
2091 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2092 ; SOFT-NEXT: vrev64.16 q8, q8
2093 ; SOFT-NEXT: vmov r1, r0, d16
2094 ; SOFT-NEXT: vmov r3, r2, d17
2097 ; HARD-LABEL: test_v8i16_v4f32:
2099 ; HARD-NEXT: vrev64.32 q8, q0
2100 ; HARD-NEXT: vadd.f32 q8, q8, q8
2101 ; HARD-NEXT: vrev32.16 q8, q8
2102 ; HARD-NEXT: vadd.i16 q8, q8, q8
2103 ; HARD-NEXT: vrev64.16 q0, q8
2105 %1 = fadd <4 x float> %p, %p
2106 %2 = bitcast <4 x float> %1 to <8 x i16>
2107 %3 = add <8 x i16> %2, %2
2111 define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) {
2112 ; SOFT-LABEL: test_v8i16_v4i32:
2114 ; SOFT-NEXT: vmov d17, r3, r2
2115 ; SOFT-NEXT: vmov d16, r1, r0
2116 ; SOFT-NEXT: vrev64.32 q8, q8
2117 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2118 ; SOFT-NEXT: vrev32.16 q8, q8
2119 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2120 ; SOFT-NEXT: vrev64.16 q8, q8
2121 ; SOFT-NEXT: vmov r1, r0, d16
2122 ; SOFT-NEXT: vmov r3, r2, d17
2125 ; HARD-LABEL: test_v8i16_v4i32:
2127 ; HARD-NEXT: vrev64.32 q8, q0
2128 ; HARD-NEXT: vadd.i32 q8, q8, q8
2129 ; HARD-NEXT: vrev32.16 q8, q8
2130 ; HARD-NEXT: vadd.i16 q8, q8, q8
2131 ; HARD-NEXT: vrev64.16 q0, q8
2133 %1 = add <4 x i32> %p, %p
2134 %2 = bitcast <4 x i32> %1 to <8 x i16>
2135 %3 = add <8 x i16> %2, %2
2139 define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) {
2140 ; SOFT-LABEL: test_v8i16_v16i8:
2142 ; SOFT-NEXT: vmov d17, r3, r2
2143 ; SOFT-NEXT: vmov d16, r1, r0
2144 ; SOFT-NEXT: vrev64.8 q8, q8
2145 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2146 ; SOFT-NEXT: vrev16.8 q8, q8
2147 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2148 ; SOFT-NEXT: vrev64.16 q8, q8
2149 ; SOFT-NEXT: vmov r1, r0, d16
2150 ; SOFT-NEXT: vmov r3, r2, d17
2153 ; HARD-LABEL: test_v8i16_v16i8:
2155 ; HARD-NEXT: vrev64.8 q8, q0
2156 ; HARD-NEXT: vadd.i8 q8, q8, q8
2157 ; HARD-NEXT: vrev16.8 q8, q8
2158 ; HARD-NEXT: vadd.i16 q8, q8, q8
2159 ; HARD-NEXT: vrev64.16 q0, q8
2161 %1 = add <16 x i8> %p, %p
2162 %2 = bitcast <16 x i8> %1 to <8 x i16>
2163 %3 = add <8 x i16> %2, %2
2167 define <16 x i8> @test_v16i8_f128(fp128 %p) {
2168 ; SOFT-LABEL: test_v16i8_f128:
2170 ; SOFT-NEXT: .save {r11, lr}
2171 ; SOFT-NEXT: push {r11, lr}
2172 ; SOFT-NEXT: .pad #16
2173 ; SOFT-NEXT: sub sp, sp, #16
2174 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
2175 ; SOFT-NEXT: bl __addtf3
2176 ; SOFT-NEXT: vmov.32 d17[0], r2
2177 ; SOFT-NEXT: vmov.32 d16[0], r0
2178 ; SOFT-NEXT: vmov.32 d17[1], r3
2179 ; SOFT-NEXT: vmov.32 d16[1], r1
2180 ; SOFT-NEXT: vrev32.8 q8, q8
2181 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2182 ; SOFT-NEXT: vrev64.8 q8, q8
2183 ; SOFT-NEXT: vmov r1, r0, d16
2184 ; SOFT-NEXT: vmov r3, r2, d17
2185 ; SOFT-NEXT: add sp, sp, #16
2186 ; SOFT-NEXT: pop {r11, pc}
2188 ; HARD-LABEL: test_v16i8_f128:
2190 ; HARD-NEXT: .save {r11, lr}
2191 ; HARD-NEXT: push {r11, lr}
2192 ; HARD-NEXT: .pad #16
2193 ; HARD-NEXT: sub sp, sp, #16
2194 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
2195 ; HARD-NEXT: bl __addtf3
2196 ; HARD-NEXT: vmov.32 d17[0], r2
2197 ; HARD-NEXT: vmov.32 d16[0], r0
2198 ; HARD-NEXT: vmov.32 d17[1], r3
2199 ; HARD-NEXT: vmov.32 d16[1], r1
2200 ; HARD-NEXT: vrev32.8 q8, q8
2201 ; HARD-NEXT: vadd.i8 q8, q8, q8
2202 ; HARD-NEXT: vrev64.8 q0, q8
2203 ; HARD-NEXT: add sp, sp, #16
2204 ; HARD-NEXT: pop {r11, pc}
2205 %1 = fadd fp128 %p, %p
2206 %2 = bitcast fp128 %1 to <16 x i8>
2207 %3 = add <16 x i8> %2, %2
2211 define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) {
2212 ; SOFT-LABEL: test_v16i8_v2f64:
2214 ; SOFT-NEXT: vmov d16, r3, r2
2215 ; SOFT-NEXT: vmov d17, r1, r0
2216 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2217 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2218 ; SOFT-NEXT: vrev64.8 q8, q9
2219 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2220 ; SOFT-NEXT: vrev64.8 q8, q8
2221 ; SOFT-NEXT: vmov r1, r0, d16
2222 ; SOFT-NEXT: vmov r3, r2, d17
2225 ; HARD-LABEL: test_v16i8_v2f64:
2227 ; HARD-NEXT: vadd.f64 d17, d1, d1
2228 ; HARD-NEXT: vadd.f64 d16, d0, d0
2229 ; HARD-NEXT: vrev64.8 q8, q8
2230 ; HARD-NEXT: vadd.i8 q8, q8, q8
2231 ; HARD-NEXT: vrev64.8 q0, q8
2233 %1 = fadd <2 x double> %p, %p
2234 %2 = bitcast <2 x double> %1 to <16 x i8>
2235 %3 = add <16 x i8> %2, %2
2239 define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) {
2240 ; SOFT-LABEL: test_v16i8_v2i64:
2242 ; SOFT-NEXT: vmov d17, r3, r2
2243 ; SOFT-NEXT: vmov d16, r1, r0
2244 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2245 ; SOFT-NEXT: vrev64.8 q8, q8
2246 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2247 ; SOFT-NEXT: vrev64.8 q8, q8
2248 ; SOFT-NEXT: vmov r1, r0, d16
2249 ; SOFT-NEXT: vmov r3, r2, d17
2252 ; HARD-LABEL: test_v16i8_v2i64:
2254 ; HARD-NEXT: vadd.i64 q8, q0, q0
2255 ; HARD-NEXT: vrev64.8 q8, q8
2256 ; HARD-NEXT: vadd.i8 q8, q8, q8
2257 ; HARD-NEXT: vrev64.8 q0, q8
2259 %1 = add <2 x i64> %p, %p
2260 %2 = bitcast <2 x i64> %1 to <16 x i8>
2261 %3 = add <16 x i8> %2, %2
2265 define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) {
2266 ; SOFT-LABEL: test_v16i8_v4f32:
2268 ; SOFT-NEXT: vmov d17, r3, r2
2269 ; SOFT-NEXT: vmov d16, r1, r0
2270 ; SOFT-NEXT: vrev64.32 q8, q8
2271 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2272 ; SOFT-NEXT: vrev32.8 q8, q8
2273 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2274 ; SOFT-NEXT: vrev64.8 q8, q8
2275 ; SOFT-NEXT: vmov r1, r0, d16
2276 ; SOFT-NEXT: vmov r3, r2, d17
2279 ; HARD-LABEL: test_v16i8_v4f32:
2281 ; HARD-NEXT: vrev64.32 q8, q0
2282 ; HARD-NEXT: vadd.f32 q8, q8, q8
2283 ; HARD-NEXT: vrev32.8 q8, q8
2284 ; HARD-NEXT: vadd.i8 q8, q8, q8
2285 ; HARD-NEXT: vrev64.8 q0, q8
2287 %1 = fadd <4 x float> %p, %p
2288 %2 = bitcast <4 x float> %1 to <16 x i8>
2289 %3 = add <16 x i8> %2, %2
2293 define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) {
2294 ; SOFT-LABEL: test_v16i8_v4i32:
2296 ; SOFT-NEXT: vmov d17, r3, r2
2297 ; SOFT-NEXT: vmov d16, r1, r0
2298 ; SOFT-NEXT: vrev64.32 q8, q8
2299 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2300 ; SOFT-NEXT: vrev32.8 q8, q8
2301 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2302 ; SOFT-NEXT: vrev64.8 q8, q8
2303 ; SOFT-NEXT: vmov r1, r0, d16
2304 ; SOFT-NEXT: vmov r3, r2, d17
2307 ; HARD-LABEL: test_v16i8_v4i32:
2309 ; HARD-NEXT: vrev64.32 q8, q0
2310 ; HARD-NEXT: vadd.i32 q8, q8, q8
2311 ; HARD-NEXT: vrev32.8 q8, q8
2312 ; HARD-NEXT: vadd.i8 q8, q8, q8
2313 ; HARD-NEXT: vrev64.8 q0, q8
2315 %1 = add <4 x i32> %p, %p
2316 %2 = bitcast <4 x i32> %1 to <16 x i8>
2317 %3 = add <16 x i8> %2, %2
2321 define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) {
2322 ; SOFT-LABEL: test_v16i8_v8i16:
2324 ; SOFT-NEXT: vmov d17, r3, r2
2325 ; SOFT-NEXT: vmov d16, r1, r0
2326 ; SOFT-NEXT: vrev64.16 q8, q8
2327 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2328 ; SOFT-NEXT: vrev16.8 q8, q8
2329 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2330 ; SOFT-NEXT: vrev64.8 q8, q8
2331 ; SOFT-NEXT: vmov r1, r0, d16
2332 ; SOFT-NEXT: vmov r3, r2, d17
2335 ; HARD-LABEL: test_v16i8_v8i16:
2337 ; HARD-NEXT: vrev64.16 q8, q0
2338 ; HARD-NEXT: vadd.i16 q8, q8, q8
2339 ; HARD-NEXT: vrev16.8 q8, q8
2340 ; HARD-NEXT: vadd.i8 q8, q8, q8
2341 ; HARD-NEXT: vrev64.8 q0, q8
2343 %1 = add <8 x i16> %p, %p
2344 %2 = bitcast <8 x i16> %1 to <16 x i8>
2345 %3 = add <16 x i8> %2, %2