1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT
3 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD
5 declare i64 @test_i64_f64_helper(double %p)
6 define void @test_i64_f64(ptr %p, ptr %q) {
7 ; SOFT-LABEL: test_i64_f64:
9 ; SOFT-NEXT: .save {r4, lr}
10 ; SOFT-NEXT: push {r4, lr}
11 ; SOFT-NEXT: vldr d16, [r0]
12 ; SOFT-NEXT: mov r4, r1
13 ; SOFT-NEXT: vadd.f64 d16, d16, d16
14 ; SOFT-NEXT: vmov r1, r0, d16
15 ; SOFT-NEXT: bl test_i64_f64_helper
16 ; SOFT-NEXT: adds r1, r1, r1
17 ; SOFT-NEXT: adc r0, r0, r0
18 ; SOFT-NEXT: strd r0, r1, [r4]
19 ; SOFT-NEXT: pop {r4, pc}
21 ; HARD-LABEL: test_i64_f64:
23 ; HARD-NEXT: .save {r4, lr}
24 ; HARD-NEXT: push {r4, lr}
25 ; HARD-NEXT: vldr d16, [r0]
26 ; HARD-NEXT: mov r4, r1
27 ; HARD-NEXT: vadd.f64 d0, d16, d16
28 ; HARD-NEXT: bl test_i64_f64_helper
29 ; HARD-NEXT: adds r1, r1, r1
30 ; HARD-NEXT: adc r0, r0, r0
31 ; HARD-NEXT: strd r0, r1, [r4]
32 ; HARD-NEXT: pop {r4, pc}
33 %1 = load double, ptr %p
34 %2 = fadd double %1, %1
35 %3 = call i64 @test_i64_f64_helper(double %2)
41 declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
42 define void @test_i64_v1i64(ptr %p, ptr %q) {
43 ; SOFT-LABEL: test_i64_v1i64:
45 ; SOFT-NEXT: .save {r4, lr}
46 ; SOFT-NEXT: push {r4, lr}
47 ; SOFT-NEXT: vldr d16, [r0]
48 ; SOFT-NEXT: mov r4, r1
49 ; SOFT-NEXT: vadd.i64 d16, d16, d16
50 ; SOFT-NEXT: vmov r1, r0, d16
51 ; SOFT-NEXT: bl test_i64_v1i64_helper
52 ; SOFT-NEXT: adds r1, r1, r1
53 ; SOFT-NEXT: adc r0, r0, r0
54 ; SOFT-NEXT: strd r0, r1, [r4]
55 ; SOFT-NEXT: pop {r4, pc}
57 ; HARD-LABEL: test_i64_v1i64:
59 ; HARD-NEXT: .save {r4, lr}
60 ; HARD-NEXT: push {r4, lr}
61 ; HARD-NEXT: vldr d16, [r0]
62 ; HARD-NEXT: mov r4, r1
63 ; HARD-NEXT: vadd.i64 d0, d16, d16
64 ; HARD-NEXT: bl test_i64_v1i64_helper
65 ; HARD-NEXT: adds r1, r1, r1
66 ; HARD-NEXT: adc r0, r0, r0
67 ; HARD-NEXT: strd r0, r1, [r4]
68 ; HARD-NEXT: pop {r4, pc}
69 %1 = load <1 x i64>, ptr %p
70 %2 = add <1 x i64> %1, %1
71 %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
77 declare i64 @test_i64_v2f32_helper(<2 x float> %p)
78 define void @test_i64_v2f32(ptr %p, ptr %q) {
79 ; SOFT-LABEL: test_i64_v2f32:
81 ; SOFT-NEXT: .save {r4, lr}
82 ; SOFT-NEXT: push {r4, lr}
83 ; SOFT-NEXT: vldr d16, [r0]
84 ; SOFT-NEXT: mov r4, r1
85 ; SOFT-NEXT: vrev64.32 d16, d16
86 ; SOFT-NEXT: vadd.f32 d16, d16, d16
87 ; SOFT-NEXT: vrev64.32 d16, d16
88 ; SOFT-NEXT: vmov r1, r0, d16
89 ; SOFT-NEXT: bl test_i64_v2f32_helper
90 ; SOFT-NEXT: adds r1, r1, r1
91 ; SOFT-NEXT: adc r0, r0, r0
92 ; SOFT-NEXT: strd r0, r1, [r4]
93 ; SOFT-NEXT: pop {r4, pc}
95 ; HARD-LABEL: test_i64_v2f32:
97 ; HARD-NEXT: .save {r4, lr}
98 ; HARD-NEXT: push {r4, lr}
99 ; HARD-NEXT: vldr d16, [r0]
100 ; HARD-NEXT: mov r4, r1
101 ; HARD-NEXT: vrev64.32 d16, d16
102 ; HARD-NEXT: vadd.f32 d16, d16, d16
103 ; HARD-NEXT: vrev64.32 d0, d16
104 ; HARD-NEXT: bl test_i64_v2f32_helper
105 ; HARD-NEXT: adds r1, r1, r1
106 ; HARD-NEXT: adc r0, r0, r0
107 ; HARD-NEXT: strd r0, r1, [r4]
108 ; HARD-NEXT: pop {r4, pc}
109 %1 = load <2 x float>, ptr %p
110 %2 = fadd <2 x float> %1, %1
111 %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
117 declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
118 define void @test_i64_v2i32(ptr %p, ptr %q) {
119 ; SOFT-LABEL: test_i64_v2i32:
121 ; SOFT-NEXT: .save {r4, lr}
122 ; SOFT-NEXT: push {r4, lr}
123 ; SOFT-NEXT: vldr d16, [r0]
124 ; SOFT-NEXT: mov r4, r1
125 ; SOFT-NEXT: vrev64.32 d16, d16
126 ; SOFT-NEXT: vadd.i32 d16, d16, d16
127 ; SOFT-NEXT: vrev64.32 d16, d16
128 ; SOFT-NEXT: vmov r1, r0, d16
129 ; SOFT-NEXT: bl test_i64_v2i32_helper
130 ; SOFT-NEXT: adds r1, r1, r1
131 ; SOFT-NEXT: adc r0, r0, r0
132 ; SOFT-NEXT: strd r0, r1, [r4]
133 ; SOFT-NEXT: pop {r4, pc}
135 ; HARD-LABEL: test_i64_v2i32:
137 ; HARD-NEXT: .save {r4, lr}
138 ; HARD-NEXT: push {r4, lr}
139 ; HARD-NEXT: vldr d16, [r0]
140 ; HARD-NEXT: mov r4, r1
141 ; HARD-NEXT: vrev64.32 d16, d16
142 ; HARD-NEXT: vadd.i32 d16, d16, d16
143 ; HARD-NEXT: vrev64.32 d0, d16
144 ; HARD-NEXT: bl test_i64_v2i32_helper
145 ; HARD-NEXT: adds r1, r1, r1
146 ; HARD-NEXT: adc r0, r0, r0
147 ; HARD-NEXT: strd r0, r1, [r4]
148 ; HARD-NEXT: pop {r4, pc}
149 %1 = load <2 x i32>, ptr %p
150 %2 = add <2 x i32> %1, %1
151 %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
157 declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
158 define void @test_i64_v4i16(ptr %p, ptr %q) {
159 ; SOFT-LABEL: test_i64_v4i16:
161 ; SOFT-NEXT: .save {r4, lr}
162 ; SOFT-NEXT: push {r4, lr}
163 ; SOFT-NEXT: vldr d16, [r0]
164 ; SOFT-NEXT: mov r4, r1
165 ; SOFT-NEXT: vrev64.16 d16, d16
166 ; SOFT-NEXT: vadd.i16 d16, d16, d16
167 ; SOFT-NEXT: vrev64.16 d16, d16
168 ; SOFT-NEXT: vmov r1, r0, d16
169 ; SOFT-NEXT: bl test_i64_v4i16_helper
170 ; SOFT-NEXT: adds r1, r1, r1
171 ; SOFT-NEXT: adc r0, r0, r0
172 ; SOFT-NEXT: strd r0, r1, [r4]
173 ; SOFT-NEXT: pop {r4, pc}
175 ; HARD-LABEL: test_i64_v4i16:
177 ; HARD-NEXT: .save {r4, lr}
178 ; HARD-NEXT: push {r4, lr}
179 ; HARD-NEXT: vldr d16, [r0]
180 ; HARD-NEXT: mov r4, r1
181 ; HARD-NEXT: vrev64.16 d16, d16
182 ; HARD-NEXT: vadd.i16 d16, d16, d16
183 ; HARD-NEXT: vrev64.16 d0, d16
184 ; HARD-NEXT: bl test_i64_v4i16_helper
185 ; HARD-NEXT: adds r1, r1, r1
186 ; HARD-NEXT: adc r0, r0, r0
187 ; HARD-NEXT: strd r0, r1, [r4]
188 ; HARD-NEXT: pop {r4, pc}
189 %1 = load <4 x i16>, ptr %p
190 %2 = add <4 x i16> %1, %1
191 %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
197 declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
198 define void @test_i64_v8i8(ptr %p, ptr %q) {
199 ; SOFT-LABEL: test_i64_v8i8:
201 ; SOFT-NEXT: .save {r4, lr}
202 ; SOFT-NEXT: push {r4, lr}
203 ; SOFT-NEXT: vldr d16, [r0]
204 ; SOFT-NEXT: mov r4, r1
205 ; SOFT-NEXT: vrev64.8 d16, d16
206 ; SOFT-NEXT: vadd.i8 d16, d16, d16
207 ; SOFT-NEXT: vrev64.8 d16, d16
208 ; SOFT-NEXT: vmov r1, r0, d16
209 ; SOFT-NEXT: bl test_i64_v8i8_helper
210 ; SOFT-NEXT: adds r1, r1, r1
211 ; SOFT-NEXT: adc r0, r0, r0
212 ; SOFT-NEXT: strd r0, r1, [r4]
213 ; SOFT-NEXT: pop {r4, pc}
215 ; HARD-LABEL: test_i64_v8i8:
217 ; HARD-NEXT: .save {r4, lr}
218 ; HARD-NEXT: push {r4, lr}
219 ; HARD-NEXT: vldr d16, [r0]
220 ; HARD-NEXT: mov r4, r1
221 ; HARD-NEXT: vrev64.8 d16, d16
222 ; HARD-NEXT: vadd.i8 d16, d16, d16
223 ; HARD-NEXT: vrev64.8 d0, d16
224 ; HARD-NEXT: bl test_i64_v8i8_helper
225 ; HARD-NEXT: adds r1, r1, r1
226 ; HARD-NEXT: adc r0, r0, r0
227 ; HARD-NEXT: strd r0, r1, [r4]
228 ; HARD-NEXT: pop {r4, pc}
229 %1 = load <8 x i8>, ptr %p
230 %2 = add <8 x i8> %1, %1
231 %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
237 declare double @test_f64_i64_helper(i64 %p)
238 define void @test_f64_i64(ptr %p, ptr %q) {
239 ; SOFT-LABEL: test_f64_i64:
241 ; SOFT-NEXT: .save {r4, lr}
242 ; SOFT-NEXT: push {r4, lr}
243 ; SOFT-NEXT: mov r4, r1
244 ; SOFT-NEXT: ldrd r0, r1, [r0]
245 ; SOFT-NEXT: adds r1, r1, r1
246 ; SOFT-NEXT: adc r0, r0, r0
247 ; SOFT-NEXT: bl test_f64_i64_helper
248 ; SOFT-NEXT: vmov d16, r1, r0
249 ; SOFT-NEXT: vadd.f64 d16, d16, d16
250 ; SOFT-NEXT: vstr d16, [r4]
251 ; SOFT-NEXT: pop {r4, pc}
253 ; HARD-LABEL: test_f64_i64:
255 ; HARD-NEXT: .save {r4, lr}
256 ; HARD-NEXT: push {r4, lr}
257 ; HARD-NEXT: mov r4, r1
258 ; HARD-NEXT: ldrd r0, r1, [r0]
259 ; HARD-NEXT: adds r1, r1, r1
260 ; HARD-NEXT: adc r0, r0, r0
261 ; HARD-NEXT: bl test_f64_i64_helper
262 ; HARD-NEXT: vadd.f64 d16, d0, d0
263 ; HARD-NEXT: vstr d16, [r4]
264 ; HARD-NEXT: pop {r4, pc}
265 %1 = load i64, ptr %p
267 %3 = call double @test_f64_i64_helper(i64 %2)
268 %4 = fadd double %3, %3
269 store double %4, ptr %q
273 declare double @test_f64_v1i64_helper(<1 x i64> %p)
274 define void @test_f64_v1i64(ptr %p, ptr %q) {
275 ; SOFT-LABEL: test_f64_v1i64:
277 ; SOFT-NEXT: .save {r4, lr}
278 ; SOFT-NEXT: push {r4, lr}
279 ; SOFT-NEXT: vldr d16, [r0]
280 ; SOFT-NEXT: mov r4, r1
281 ; SOFT-NEXT: vadd.i64 d16, d16, d16
282 ; SOFT-NEXT: vmov r1, r0, d16
283 ; SOFT-NEXT: bl test_f64_v1i64_helper
284 ; SOFT-NEXT: vmov d16, r1, r0
285 ; SOFT-NEXT: vadd.f64 d16, d16, d16
286 ; SOFT-NEXT: vstr d16, [r4]
287 ; SOFT-NEXT: pop {r4, pc}
289 ; HARD-LABEL: test_f64_v1i64:
291 ; HARD-NEXT: .save {r4, lr}
292 ; HARD-NEXT: push {r4, lr}
293 ; HARD-NEXT: vldr d16, [r0]
294 ; HARD-NEXT: mov r4, r1
295 ; HARD-NEXT: vadd.i64 d0, d16, d16
296 ; HARD-NEXT: bl test_f64_v1i64_helper
297 ; HARD-NEXT: vadd.f64 d16, d0, d0
298 ; HARD-NEXT: vstr d16, [r4]
299 ; HARD-NEXT: pop {r4, pc}
300 %1 = load <1 x i64>, ptr %p
301 %2 = add <1 x i64> %1, %1
302 %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
303 %4 = fadd double %3, %3
304 store double %4, ptr %q
308 declare double @test_f64_v2f32_helper(<2 x float> %p)
309 define void @test_f64_v2f32(ptr %p, ptr %q) {
310 ; SOFT-LABEL: test_f64_v2f32:
312 ; SOFT-NEXT: .save {r4, lr}
313 ; SOFT-NEXT: push {r4, lr}
314 ; SOFT-NEXT: vldr d16, [r0]
315 ; SOFT-NEXT: mov r4, r1
316 ; SOFT-NEXT: vrev64.32 d16, d16
317 ; SOFT-NEXT: vadd.f32 d16, d16, d16
318 ; SOFT-NEXT: vrev64.32 d16, d16
319 ; SOFT-NEXT: vmov r1, r0, d16
320 ; SOFT-NEXT: bl test_f64_v2f32_helper
321 ; SOFT-NEXT: vmov d16, r1, r0
322 ; SOFT-NEXT: vadd.f64 d16, d16, d16
323 ; SOFT-NEXT: vstr d16, [r4]
324 ; SOFT-NEXT: pop {r4, pc}
326 ; HARD-LABEL: test_f64_v2f32:
328 ; HARD-NEXT: .save {r4, lr}
329 ; HARD-NEXT: push {r4, lr}
330 ; HARD-NEXT: vldr d16, [r0]
331 ; HARD-NEXT: mov r4, r1
332 ; HARD-NEXT: vrev64.32 d16, d16
333 ; HARD-NEXT: vadd.f32 d16, d16, d16
334 ; HARD-NEXT: vrev64.32 d0, d16
335 ; HARD-NEXT: bl test_f64_v2f32_helper
336 ; HARD-NEXT: vadd.f64 d16, d0, d0
337 ; HARD-NEXT: vstr d16, [r4]
338 ; HARD-NEXT: pop {r4, pc}
339 %1 = load <2 x float>, ptr %p
340 %2 = fadd <2 x float> %1, %1
341 %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
342 %4 = fadd double %3, %3
343 store double %4, ptr %q
347 declare double @test_f64_v2i32_helper(<2 x i32> %p)
348 define void @test_f64_v2i32(ptr %p, ptr %q) {
349 ; SOFT-LABEL: test_f64_v2i32:
351 ; SOFT-NEXT: .save {r4, lr}
352 ; SOFT-NEXT: push {r4, lr}
353 ; SOFT-NEXT: vldr d16, [r0]
354 ; SOFT-NEXT: mov r4, r1
355 ; SOFT-NEXT: vrev64.32 d16, d16
356 ; SOFT-NEXT: vadd.i32 d16, d16, d16
357 ; SOFT-NEXT: vrev64.32 d16, d16
358 ; SOFT-NEXT: vmov r1, r0, d16
359 ; SOFT-NEXT: bl test_f64_v2i32_helper
360 ; SOFT-NEXT: vmov d16, r1, r0
361 ; SOFT-NEXT: vadd.f64 d16, d16, d16
362 ; SOFT-NEXT: vstr d16, [r4]
363 ; SOFT-NEXT: pop {r4, pc}
365 ; HARD-LABEL: test_f64_v2i32:
367 ; HARD-NEXT: .save {r4, lr}
368 ; HARD-NEXT: push {r4, lr}
369 ; HARD-NEXT: vldr d16, [r0]
370 ; HARD-NEXT: mov r4, r1
371 ; HARD-NEXT: vrev64.32 d16, d16
372 ; HARD-NEXT: vadd.i32 d16, d16, d16
373 ; HARD-NEXT: vrev64.32 d0, d16
374 ; HARD-NEXT: bl test_f64_v2i32_helper
375 ; HARD-NEXT: vadd.f64 d16, d0, d0
376 ; HARD-NEXT: vstr d16, [r4]
377 ; HARD-NEXT: pop {r4, pc}
378 %1 = load <2 x i32>, ptr %p
379 %2 = add <2 x i32> %1, %1
380 %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
381 %4 = fadd double %3, %3
382 store double %4, ptr %q
386 declare double @test_f64_v4i16_helper(<4 x i16> %p)
387 define void @test_f64_v4i16(ptr %p, ptr %q) {
388 ; SOFT-LABEL: test_f64_v4i16:
390 ; SOFT-NEXT: .save {r4, lr}
391 ; SOFT-NEXT: push {r4, lr}
392 ; SOFT-NEXT: vldr d16, [r0]
393 ; SOFT-NEXT: mov r4, r1
394 ; SOFT-NEXT: vrev64.16 d16, d16
395 ; SOFT-NEXT: vadd.i16 d16, d16, d16
396 ; SOFT-NEXT: vrev64.16 d16, d16
397 ; SOFT-NEXT: vmov r1, r0, d16
398 ; SOFT-NEXT: bl test_f64_v4i16_helper
399 ; SOFT-NEXT: vmov d16, r1, r0
400 ; SOFT-NEXT: vadd.f64 d16, d16, d16
401 ; SOFT-NEXT: vstr d16, [r4]
402 ; SOFT-NEXT: pop {r4, pc}
404 ; HARD-LABEL: test_f64_v4i16:
406 ; HARD-NEXT: .save {r4, lr}
407 ; HARD-NEXT: push {r4, lr}
408 ; HARD-NEXT: vldr d16, [r0]
409 ; HARD-NEXT: mov r4, r1
410 ; HARD-NEXT: vrev64.16 d16, d16
411 ; HARD-NEXT: vadd.i16 d16, d16, d16
412 ; HARD-NEXT: vrev64.16 d0, d16
413 ; HARD-NEXT: bl test_f64_v4i16_helper
414 ; HARD-NEXT: vadd.f64 d16, d0, d0
415 ; HARD-NEXT: vstr d16, [r4]
416 ; HARD-NEXT: pop {r4, pc}
417 %1 = load <4 x i16>, ptr %p
418 %2 = add <4 x i16> %1, %1
419 %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
420 %4 = fadd double %3, %3
421 store double %4, ptr %q
425 declare double @test_f64_v8i8_helper(<8 x i8> %p)
426 define void @test_f64_v8i8(ptr %p, ptr %q) {
427 ; SOFT-LABEL: test_f64_v8i8:
429 ; SOFT-NEXT: .save {r4, lr}
430 ; SOFT-NEXT: push {r4, lr}
431 ; SOFT-NEXT: vldr d16, [r0]
432 ; SOFT-NEXT: mov r4, r1
433 ; SOFT-NEXT: vrev64.8 d16, d16
434 ; SOFT-NEXT: vadd.i8 d16, d16, d16
435 ; SOFT-NEXT: vrev64.8 d16, d16
436 ; SOFT-NEXT: vmov r1, r0, d16
437 ; SOFT-NEXT: bl test_f64_v8i8_helper
438 ; SOFT-NEXT: vmov d16, r1, r0
439 ; SOFT-NEXT: vadd.f64 d16, d16, d16
440 ; SOFT-NEXT: vstr d16, [r4]
441 ; SOFT-NEXT: pop {r4, pc}
443 ; HARD-LABEL: test_f64_v8i8:
445 ; HARD-NEXT: .save {r4, lr}
446 ; HARD-NEXT: push {r4, lr}
447 ; HARD-NEXT: vldr d16, [r0]
448 ; HARD-NEXT: mov r4, r1
449 ; HARD-NEXT: vrev64.8 d16, d16
450 ; HARD-NEXT: vadd.i8 d16, d16, d16
451 ; HARD-NEXT: vrev64.8 d0, d16
452 ; HARD-NEXT: bl test_f64_v8i8_helper
453 ; HARD-NEXT: vadd.f64 d16, d0, d0
454 ; HARD-NEXT: vstr d16, [r4]
455 ; HARD-NEXT: pop {r4, pc}
456 %1 = load <8 x i8>, ptr %p
457 %2 = add <8 x i8> %1, %1
458 %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
459 %4 = fadd double %3, %3
460 store double %4, ptr %q
464 declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
465 define void @test_v1i64_i64(ptr %p, ptr %q) {
466 ; SOFT-LABEL: test_v1i64_i64:
468 ; SOFT-NEXT: .save {r4, lr}
469 ; SOFT-NEXT: push {r4, lr}
470 ; SOFT-NEXT: mov r4, r1
471 ; SOFT-NEXT: ldrd r0, r1, [r0]
472 ; SOFT-NEXT: adds r1, r1, r1
473 ; SOFT-NEXT: adc r0, r0, r0
474 ; SOFT-NEXT: bl test_v1i64_i64_helper
475 ; SOFT-NEXT: vmov d16, r1, r0
476 ; SOFT-NEXT: vadd.i64 d16, d16, d16
477 ; SOFT-NEXT: vstr d16, [r4]
478 ; SOFT-NEXT: pop {r4, pc}
480 ; HARD-LABEL: test_v1i64_i64:
482 ; HARD-NEXT: .save {r4, lr}
483 ; HARD-NEXT: push {r4, lr}
484 ; HARD-NEXT: mov r4, r1
485 ; HARD-NEXT: ldrd r0, r1, [r0]
486 ; HARD-NEXT: adds r1, r1, r1
487 ; HARD-NEXT: adc r0, r0, r0
488 ; HARD-NEXT: bl test_v1i64_i64_helper
489 ; HARD-NEXT: vadd.i64 d16, d0, d0
490 ; HARD-NEXT: vstr d16, [r4]
491 ; HARD-NEXT: pop {r4, pc}
492 %1 = load i64, ptr %p
494 %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
495 %4 = add <1 x i64> %3, %3
496 store <1 x i64> %4, ptr %q
500 declare <1 x i64> @test_v1i64_f64_helper(double %p)
501 define void @test_v1i64_f64(ptr %p, ptr %q) {
502 ; SOFT-LABEL: test_v1i64_f64:
504 ; SOFT-NEXT: .save {r4, lr}
505 ; SOFT-NEXT: push {r4, lr}
506 ; SOFT-NEXT: vldr d16, [r0]
507 ; SOFT-NEXT: mov r4, r1
508 ; SOFT-NEXT: vadd.f64 d16, d16, d16
509 ; SOFT-NEXT: vmov r1, r0, d16
510 ; SOFT-NEXT: bl test_v1i64_f64_helper
511 ; SOFT-NEXT: vmov d16, r1, r0
512 ; SOFT-NEXT: vadd.i64 d16, d16, d16
513 ; SOFT-NEXT: vstr d16, [r4]
514 ; SOFT-NEXT: pop {r4, pc}
516 ; HARD-LABEL: test_v1i64_f64:
518 ; HARD-NEXT: .save {r4, lr}
519 ; HARD-NEXT: push {r4, lr}
520 ; HARD-NEXT: vldr d16, [r0]
521 ; HARD-NEXT: mov r4, r1
522 ; HARD-NEXT: vadd.f64 d0, d16, d16
523 ; HARD-NEXT: bl test_v1i64_f64_helper
524 ; HARD-NEXT: vadd.i64 d16, d0, d0
525 ; HARD-NEXT: vstr d16, [r4]
526 ; HARD-NEXT: pop {r4, pc}
527 %1 = load double, ptr %p
528 %2 = fadd double %1, %1
529 %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
530 %4 = add <1 x i64> %3, %3
531 store <1 x i64> %4, ptr %q
535 declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
536 define void @test_v1i64_v2f32(ptr %p, ptr %q) {
537 ; SOFT-LABEL: test_v1i64_v2f32:
539 ; SOFT-NEXT: .save {r4, lr}
540 ; SOFT-NEXT: push {r4, lr}
541 ; SOFT-NEXT: vldr d16, [r0]
542 ; SOFT-NEXT: mov r4, r1
543 ; SOFT-NEXT: vrev64.32 d16, d16
544 ; SOFT-NEXT: vadd.f32 d16, d16, d16
545 ; SOFT-NEXT: vrev64.32 d16, d16
546 ; SOFT-NEXT: vmov r1, r0, d16
547 ; SOFT-NEXT: bl test_v1i64_v2f32_helper
548 ; SOFT-NEXT: vmov d16, r1, r0
549 ; SOFT-NEXT: vadd.i64 d16, d16, d16
550 ; SOFT-NEXT: vstr d16, [r4]
551 ; SOFT-NEXT: pop {r4, pc}
553 ; HARD-LABEL: test_v1i64_v2f32:
555 ; HARD-NEXT: .save {r4, lr}
556 ; HARD-NEXT: push {r4, lr}
557 ; HARD-NEXT: vldr d16, [r0]
558 ; HARD-NEXT: mov r4, r1
559 ; HARD-NEXT: vrev64.32 d16, d16
560 ; HARD-NEXT: vadd.f32 d16, d16, d16
561 ; HARD-NEXT: vrev64.32 d0, d16
562 ; HARD-NEXT: bl test_v1i64_v2f32_helper
563 ; HARD-NEXT: vadd.i64 d16, d0, d0
564 ; HARD-NEXT: vstr d16, [r4]
565 ; HARD-NEXT: pop {r4, pc}
566 %1 = load <2 x float>, ptr %p
567 %2 = fadd <2 x float> %1, %1
568 %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
569 %4 = add <1 x i64> %3, %3
570 store <1 x i64> %4, ptr %q
574 declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
575 define void @test_v1i64_v2i32(ptr %p, ptr %q) {
576 ; SOFT-LABEL: test_v1i64_v2i32:
578 ; SOFT-NEXT: .save {r4, lr}
579 ; SOFT-NEXT: push {r4, lr}
580 ; SOFT-NEXT: vldr d16, [r0]
581 ; SOFT-NEXT: mov r4, r1
582 ; SOFT-NEXT: vrev64.32 d16, d16
583 ; SOFT-NEXT: vadd.i32 d16, d16, d16
584 ; SOFT-NEXT: vrev64.32 d16, d16
585 ; SOFT-NEXT: vmov r1, r0, d16
586 ; SOFT-NEXT: bl test_v1i64_v2i32_helper
587 ; SOFT-NEXT: vmov d16, r1, r0
588 ; SOFT-NEXT: vadd.i64 d16, d16, d16
589 ; SOFT-NEXT: vstr d16, [r4]
590 ; SOFT-NEXT: pop {r4, pc}
592 ; HARD-LABEL: test_v1i64_v2i32:
594 ; HARD-NEXT: .save {r4, lr}
595 ; HARD-NEXT: push {r4, lr}
596 ; HARD-NEXT: vldr d16, [r0]
597 ; HARD-NEXT: mov r4, r1
598 ; HARD-NEXT: vrev64.32 d16, d16
599 ; HARD-NEXT: vadd.i32 d16, d16, d16
600 ; HARD-NEXT: vrev64.32 d0, d16
601 ; HARD-NEXT: bl test_v1i64_v2i32_helper
602 ; HARD-NEXT: vadd.i64 d16, d0, d0
603 ; HARD-NEXT: vstr d16, [r4]
604 ; HARD-NEXT: pop {r4, pc}
605 %1 = load <2 x i32>, ptr %p
606 %2 = add <2 x i32> %1, %1
607 %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
608 %4 = add <1 x i64> %3, %3
609 store <1 x i64> %4, ptr %q
613 declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
614 define void @test_v1i64_v4i16(ptr %p, ptr %q) {
615 ; SOFT-LABEL: test_v1i64_v4i16:
617 ; SOFT-NEXT: .save {r4, lr}
618 ; SOFT-NEXT: push {r4, lr}
619 ; SOFT-NEXT: vldr d16, [r0]
620 ; SOFT-NEXT: mov r4, r1
621 ; SOFT-NEXT: vrev64.16 d16, d16
622 ; SOFT-NEXT: vadd.i16 d16, d16, d16
623 ; SOFT-NEXT: vrev64.16 d16, d16
624 ; SOFT-NEXT: vmov r1, r0, d16
625 ; SOFT-NEXT: bl test_v1i64_v4i16_helper
626 ; SOFT-NEXT: vmov d16, r1, r0
627 ; SOFT-NEXT: vadd.i64 d16, d16, d16
628 ; SOFT-NEXT: vstr d16, [r4]
629 ; SOFT-NEXT: pop {r4, pc}
631 ; HARD-LABEL: test_v1i64_v4i16:
633 ; HARD-NEXT: .save {r4, lr}
634 ; HARD-NEXT: push {r4, lr}
635 ; HARD-NEXT: vldr d16, [r0]
636 ; HARD-NEXT: mov r4, r1
637 ; HARD-NEXT: vrev64.16 d16, d16
638 ; HARD-NEXT: vadd.i16 d16, d16, d16
639 ; HARD-NEXT: vrev64.16 d0, d16
640 ; HARD-NEXT: bl test_v1i64_v4i16_helper
641 ; HARD-NEXT: vadd.i64 d16, d0, d0
642 ; HARD-NEXT: vstr d16, [r4]
643 ; HARD-NEXT: pop {r4, pc}
644 %1 = load <4 x i16>, ptr %p
645 %2 = add <4 x i16> %1, %1
646 %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
647 %4 = add <1 x i64> %3, %3
648 store <1 x i64> %4, ptr %q
652 declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
653 define void @test_v1i64_v8i8(ptr %p, ptr %q) {
654 ; SOFT-LABEL: test_v1i64_v8i8:
656 ; SOFT-NEXT: .save {r4, lr}
657 ; SOFT-NEXT: push {r4, lr}
658 ; SOFT-NEXT: vldr d16, [r0]
659 ; SOFT-NEXT: mov r4, r1
660 ; SOFT-NEXT: vrev64.8 d16, d16
661 ; SOFT-NEXT: vadd.i8 d16, d16, d16
662 ; SOFT-NEXT: vrev64.8 d16, d16
663 ; SOFT-NEXT: vmov r1, r0, d16
664 ; SOFT-NEXT: bl test_v1i64_v8i8_helper
665 ; SOFT-NEXT: vmov d16, r1, r0
666 ; SOFT-NEXT: vadd.i64 d16, d16, d16
667 ; SOFT-NEXT: vstr d16, [r4]
668 ; SOFT-NEXT: pop {r4, pc}
670 ; HARD-LABEL: test_v1i64_v8i8:
672 ; HARD-NEXT: .save {r4, lr}
673 ; HARD-NEXT: push {r4, lr}
674 ; HARD-NEXT: vldr d16, [r0]
675 ; HARD-NEXT: mov r4, r1
676 ; HARD-NEXT: vrev64.8 d16, d16
677 ; HARD-NEXT: vadd.i8 d16, d16, d16
678 ; HARD-NEXT: vrev64.8 d0, d16
679 ; HARD-NEXT: bl test_v1i64_v8i8_helper
680 ; HARD-NEXT: vadd.i64 d16, d0, d0
681 ; HARD-NEXT: vstr d16, [r4]
682 ; HARD-NEXT: pop {r4, pc}
683 %1 = load <8 x i8>, ptr %p
684 %2 = add <8 x i8> %1, %1
685 %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
686 %4 = add <1 x i64> %3, %3
687 store <1 x i64> %4, ptr %q
691 declare <2 x float> @test_v2f32_i64_helper(i64 %p)
692 define void @test_v2f32_i64(ptr %p, ptr %q) {
693 ; SOFT-LABEL: test_v2f32_i64:
695 ; SOFT-NEXT: .save {r4, lr}
696 ; SOFT-NEXT: push {r4, lr}
697 ; SOFT-NEXT: mov r4, r1
698 ; SOFT-NEXT: ldrd r0, r1, [r0]
699 ; SOFT-NEXT: adds r1, r1, r1
700 ; SOFT-NEXT: adc r0, r0, r0
701 ; SOFT-NEXT: bl test_v2f32_i64_helper
702 ; SOFT-NEXT: vmov d16, r1, r0
703 ; SOFT-NEXT: vrev64.32 d16, d16
704 ; SOFT-NEXT: vadd.f32 d16, d16, d16
705 ; SOFT-NEXT: vrev64.32 d16, d16
706 ; SOFT-NEXT: vstr d16, [r4]
707 ; SOFT-NEXT: pop {r4, pc}
709 ; HARD-LABEL: test_v2f32_i64:
711 ; HARD-NEXT: .save {r4, lr}
712 ; HARD-NEXT: push {r4, lr}
713 ; HARD-NEXT: mov r4, r1
714 ; HARD-NEXT: ldrd r0, r1, [r0]
715 ; HARD-NEXT: adds r1, r1, r1
716 ; HARD-NEXT: adc r0, r0, r0
717 ; HARD-NEXT: bl test_v2f32_i64_helper
718 ; HARD-NEXT: vrev64.32 d16, d0
719 ; HARD-NEXT: vadd.f32 d16, d16, d16
720 ; HARD-NEXT: vrev64.32 d16, d16
721 ; HARD-NEXT: vstr d16, [r4]
722 ; HARD-NEXT: pop {r4, pc}
723 %1 = load i64, ptr %p
725 %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
726 %4 = fadd <2 x float> %3, %3
727 store <2 x float> %4, ptr %q
731 declare <2 x float> @test_v2f32_f64_helper(double %p)
732 define void @test_v2f32_f64(ptr %p, ptr %q) {
733 ; SOFT-LABEL: test_v2f32_f64:
735 ; SOFT-NEXT: .save {r4, lr}
736 ; SOFT-NEXT: push {r4, lr}
737 ; SOFT-NEXT: vldr d16, [r0]
738 ; SOFT-NEXT: mov r4, r1
739 ; SOFT-NEXT: vadd.f64 d16, d16, d16
740 ; SOFT-NEXT: vmov r1, r0, d16
741 ; SOFT-NEXT: bl test_v2f32_f64_helper
742 ; SOFT-NEXT: vmov d16, r1, r0
743 ; SOFT-NEXT: vrev64.32 d16, d16
744 ; SOFT-NEXT: vadd.f32 d16, d16, d16
745 ; SOFT-NEXT: vrev64.32 d16, d16
746 ; SOFT-NEXT: vstr d16, [r4]
747 ; SOFT-NEXT: pop {r4, pc}
749 ; HARD-LABEL: test_v2f32_f64:
751 ; HARD-NEXT: .save {r4, lr}
752 ; HARD-NEXT: push {r4, lr}
753 ; HARD-NEXT: vldr d16, [r0]
754 ; HARD-NEXT: mov r4, r1
755 ; HARD-NEXT: vadd.f64 d0, d16, d16
756 ; HARD-NEXT: bl test_v2f32_f64_helper
757 ; HARD-NEXT: vrev64.32 d16, d0
758 ; HARD-NEXT: vadd.f32 d16, d16, d16
759 ; HARD-NEXT: vrev64.32 d16, d16
760 ; HARD-NEXT: vstr d16, [r4]
761 ; HARD-NEXT: pop {r4, pc}
762 %1 = load double, ptr %p
763 %2 = fadd double %1, %1
764 %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
765 %4 = fadd <2 x float> %3, %3
766 store <2 x float> %4, ptr %q
770 declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
771 define void @test_v2f32_v1i64(ptr %p, ptr %q) {
772 ; SOFT-LABEL: test_v2f32_v1i64:
774 ; SOFT-NEXT: .save {r4, lr}
775 ; SOFT-NEXT: push {r4, lr}
776 ; SOFT-NEXT: vldr d16, [r0]
777 ; SOFT-NEXT: mov r4, r1
778 ; SOFT-NEXT: vadd.i64 d16, d16, d16
779 ; SOFT-NEXT: vmov r1, r0, d16
780 ; SOFT-NEXT: bl test_v2f32_v1i64_helper
781 ; SOFT-NEXT: vmov d16, r1, r0
782 ; SOFT-NEXT: vrev64.32 d16, d16
783 ; SOFT-NEXT: vadd.f32 d16, d16, d16
784 ; SOFT-NEXT: vrev64.32 d16, d16
785 ; SOFT-NEXT: vstr d16, [r4]
786 ; SOFT-NEXT: pop {r4, pc}
788 ; HARD-LABEL: test_v2f32_v1i64:
790 ; HARD-NEXT: .save {r4, lr}
791 ; HARD-NEXT: push {r4, lr}
792 ; HARD-NEXT: vldr d16, [r0]
793 ; HARD-NEXT: mov r4, r1
794 ; HARD-NEXT: vadd.i64 d0, d16, d16
795 ; HARD-NEXT: bl test_v2f32_v1i64_helper
796 ; HARD-NEXT: vrev64.32 d16, d0
797 ; HARD-NEXT: vadd.f32 d16, d16, d16
798 ; HARD-NEXT: vrev64.32 d16, d16
799 ; HARD-NEXT: vstr d16, [r4]
800 ; HARD-NEXT: pop {r4, pc}
801 %1 = load <1 x i64>, ptr %p
802 %2 = add <1 x i64> %1, %1
803 %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
804 %4 = fadd <2 x float> %3, %3
805 store <2 x float> %4, ptr %q
809 declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
810 define void @test_v2f32_v2i32(ptr %p, ptr %q) {
811 ; SOFT-LABEL: test_v2f32_v2i32:
813 ; SOFT-NEXT: .save {r4, lr}
814 ; SOFT-NEXT: push {r4, lr}
815 ; SOFT-NEXT: vldr d16, [r0]
816 ; SOFT-NEXT: mov r4, r1
817 ; SOFT-NEXT: vrev64.32 d16, d16
818 ; SOFT-NEXT: vadd.i32 d16, d16, d16
819 ; SOFT-NEXT: vrev64.32 d16, d16
820 ; SOFT-NEXT: vmov r1, r0, d16
821 ; SOFT-NEXT: bl test_v2f32_v2i32_helper
822 ; SOFT-NEXT: vmov d16, r1, r0
823 ; SOFT-NEXT: vrev64.32 d16, d16
824 ; SOFT-NEXT: vadd.f32 d16, d16, d16
825 ; SOFT-NEXT: vrev64.32 d16, d16
826 ; SOFT-NEXT: vstr d16, [r4]
827 ; SOFT-NEXT: pop {r4, pc}
829 ; HARD-LABEL: test_v2f32_v2i32:
831 ; HARD-NEXT: .save {r4, lr}
832 ; HARD-NEXT: push {r4, lr}
833 ; HARD-NEXT: vldr d16, [r0]
834 ; HARD-NEXT: mov r4, r1
835 ; HARD-NEXT: vrev64.32 d16, d16
836 ; HARD-NEXT: vadd.i32 d16, d16, d16
837 ; HARD-NEXT: vrev64.32 d0, d16
838 ; HARD-NEXT: bl test_v2f32_v2i32_helper
839 ; HARD-NEXT: vrev64.32 d16, d0
840 ; HARD-NEXT: vadd.f32 d16, d16, d16
841 ; HARD-NEXT: vrev64.32 d16, d16
842 ; HARD-NEXT: vstr d16, [r4]
843 ; HARD-NEXT: pop {r4, pc}
844 %1 = load <2 x i32>, ptr %p
845 %2 = add <2 x i32> %1, %1
846 %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
847 %4 = fadd <2 x float> %3, %3
848 store <2 x float> %4, ptr %q
852 declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
853 define void @test_v2f32_v4i16(ptr %p, ptr %q) {
854 ; SOFT-LABEL: test_v2f32_v4i16:
856 ; SOFT-NEXT: .save {r4, lr}
857 ; SOFT-NEXT: push {r4, lr}
858 ; SOFT-NEXT: vldr d16, [r0]
859 ; SOFT-NEXT: mov r4, r1
860 ; SOFT-NEXT: vrev64.16 d16, d16
861 ; SOFT-NEXT: vadd.i16 d16, d16, d16
862 ; SOFT-NEXT: vrev64.16 d16, d16
863 ; SOFT-NEXT: vmov r1, r0, d16
864 ; SOFT-NEXT: bl test_v2f32_v4i16_helper
865 ; SOFT-NEXT: vmov d16, r1, r0
866 ; SOFT-NEXT: vrev64.32 d16, d16
867 ; SOFT-NEXT: vadd.f32 d16, d16, d16
868 ; SOFT-NEXT: vrev64.32 d16, d16
869 ; SOFT-NEXT: vstr d16, [r4]
870 ; SOFT-NEXT: pop {r4, pc}
872 ; HARD-LABEL: test_v2f32_v4i16:
874 ; HARD-NEXT: .save {r4, lr}
875 ; HARD-NEXT: push {r4, lr}
876 ; HARD-NEXT: vldr d16, [r0]
877 ; HARD-NEXT: mov r4, r1
878 ; HARD-NEXT: vrev64.16 d16, d16
879 ; HARD-NEXT: vadd.i16 d16, d16, d16
880 ; HARD-NEXT: vrev64.16 d0, d16
881 ; HARD-NEXT: bl test_v2f32_v4i16_helper
882 ; HARD-NEXT: vrev64.32 d16, d0
883 ; HARD-NEXT: vadd.f32 d16, d16, d16
884 ; HARD-NEXT: vrev64.32 d16, d16
885 ; HARD-NEXT: vstr d16, [r4]
886 ; HARD-NEXT: pop {r4, pc}
887 %1 = load <4 x i16>, ptr %p
888 %2 = add <4 x i16> %1, %1
889 %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
890 %4 = fadd <2 x float> %3, %3
891 store <2 x float> %4, ptr %q
895 declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
896 define void @test_v2f32_v8i8(ptr %p, ptr %q) {
897 ; SOFT-LABEL: test_v2f32_v8i8:
899 ; SOFT-NEXT: .save {r4, lr}
900 ; SOFT-NEXT: push {r4, lr}
901 ; SOFT-NEXT: vldr d16, [r0]
902 ; SOFT-NEXT: mov r4, r1
903 ; SOFT-NEXT: vrev64.8 d16, d16
904 ; SOFT-NEXT: vadd.i8 d16, d16, d16
905 ; SOFT-NEXT: vrev64.8 d16, d16
906 ; SOFT-NEXT: vmov r1, r0, d16
907 ; SOFT-NEXT: bl test_v2f32_v8i8_helper
908 ; SOFT-NEXT: vmov d16, r1, r0
909 ; SOFT-NEXT: vrev64.32 d16, d16
910 ; SOFT-NEXT: vadd.f32 d16, d16, d16
911 ; SOFT-NEXT: vrev64.32 d16, d16
912 ; SOFT-NEXT: vstr d16, [r4]
913 ; SOFT-NEXT: pop {r4, pc}
915 ; HARD-LABEL: test_v2f32_v8i8:
917 ; HARD-NEXT: .save {r4, lr}
918 ; HARD-NEXT: push {r4, lr}
919 ; HARD-NEXT: vldr d16, [r0]
920 ; HARD-NEXT: mov r4, r1
921 ; HARD-NEXT: vrev64.8 d16, d16
922 ; HARD-NEXT: vadd.i8 d16, d16, d16
923 ; HARD-NEXT: vrev64.8 d0, d16
924 ; HARD-NEXT: bl test_v2f32_v8i8_helper
925 ; HARD-NEXT: vrev64.32 d16, d0
926 ; HARD-NEXT: vadd.f32 d16, d16, d16
927 ; HARD-NEXT: vrev64.32 d16, d16
928 ; HARD-NEXT: vstr d16, [r4]
929 ; HARD-NEXT: pop {r4, pc}
930 %1 = load <8 x i8>, ptr %p
931 %2 = add <8 x i8> %1, %1
932 %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
933 %4 = fadd <2 x float> %3, %3
934 store <2 x float> %4, ptr %q
938 declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
939 define void @test_v2i32_i64(ptr %p, ptr %q) {
940 ; SOFT-LABEL: test_v2i32_i64:
942 ; SOFT-NEXT: .save {r4, lr}
943 ; SOFT-NEXT: push {r4, lr}
944 ; SOFT-NEXT: mov r4, r1
945 ; SOFT-NEXT: ldrd r0, r1, [r0]
946 ; SOFT-NEXT: adds r1, r1, r1
947 ; SOFT-NEXT: adc r0, r0, r0
948 ; SOFT-NEXT: bl test_v2i32_i64_helper
949 ; SOFT-NEXT: vmov d16, r1, r0
950 ; SOFT-NEXT: vrev64.32 d16, d16
951 ; SOFT-NEXT: vadd.i32 d16, d16, d16
952 ; SOFT-NEXT: vrev64.32 d16, d16
953 ; SOFT-NEXT: vstr d16, [r4]
954 ; SOFT-NEXT: pop {r4, pc}
956 ; HARD-LABEL: test_v2i32_i64:
958 ; HARD-NEXT: .save {r4, lr}
959 ; HARD-NEXT: push {r4, lr}
960 ; HARD-NEXT: mov r4, r1
961 ; HARD-NEXT: ldrd r0, r1, [r0]
962 ; HARD-NEXT: adds r1, r1, r1
963 ; HARD-NEXT: adc r0, r0, r0
964 ; HARD-NEXT: bl test_v2i32_i64_helper
965 ; HARD-NEXT: vrev64.32 d16, d0
966 ; HARD-NEXT: vadd.i32 d16, d16, d16
967 ; HARD-NEXT: vrev64.32 d16, d16
968 ; HARD-NEXT: vstr d16, [r4]
969 ; HARD-NEXT: pop {r4, pc}
970 %1 = load i64, ptr %p
972 %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
973 %4 = add <2 x i32> %3, %3
974 store <2 x i32> %4, ptr %q
978 declare <2 x i32> @test_v2i32_f64_helper(double %p)
979 define void @test_v2i32_f64(ptr %p, ptr %q) {
980 ; SOFT-LABEL: test_v2i32_f64:
982 ; SOFT-NEXT: .save {r4, lr}
983 ; SOFT-NEXT: push {r4, lr}
984 ; SOFT-NEXT: vldr d16, [r0]
985 ; SOFT-NEXT: mov r4, r1
986 ; SOFT-NEXT: vadd.f64 d16, d16, d16
987 ; SOFT-NEXT: vmov r1, r0, d16
988 ; SOFT-NEXT: bl test_v2i32_f64_helper
989 ; SOFT-NEXT: vmov d16, r1, r0
990 ; SOFT-NEXT: vrev64.32 d16, d16
991 ; SOFT-NEXT: vadd.i32 d16, d16, d16
992 ; SOFT-NEXT: vrev64.32 d16, d16
993 ; SOFT-NEXT: vstr d16, [r4]
994 ; SOFT-NEXT: pop {r4, pc}
996 ; HARD-LABEL: test_v2i32_f64:
998 ; HARD-NEXT: .save {r4, lr}
999 ; HARD-NEXT: push {r4, lr}
1000 ; HARD-NEXT: vldr d16, [r0]
1001 ; HARD-NEXT: mov r4, r1
1002 ; HARD-NEXT: vadd.f64 d0, d16, d16
1003 ; HARD-NEXT: bl test_v2i32_f64_helper
1004 ; HARD-NEXT: vrev64.32 d16, d0
1005 ; HARD-NEXT: vadd.i32 d16, d16, d16
1006 ; HARD-NEXT: vrev64.32 d16, d16
1007 ; HARD-NEXT: vstr d16, [r4]
1008 ; HARD-NEXT: pop {r4, pc}
1009 %1 = load double, ptr %p
1010 %2 = fadd double %1, %1
1011 %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
1012 %4 = add <2 x i32> %3, %3
1013 store <2 x i32> %4, ptr %q
1017 declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
1018 define void @test_v2i32_v1i64(ptr %p, ptr %q) {
1019 ; SOFT-LABEL: test_v2i32_v1i64:
1021 ; SOFT-NEXT: .save {r4, lr}
1022 ; SOFT-NEXT: push {r4, lr}
1023 ; SOFT-NEXT: vldr d16, [r0]
1024 ; SOFT-NEXT: mov r4, r1
1025 ; SOFT-NEXT: vadd.i64 d16, d16, d16
1026 ; SOFT-NEXT: vmov r1, r0, d16
1027 ; SOFT-NEXT: bl test_v2i32_v1i64_helper
1028 ; SOFT-NEXT: vmov d16, r1, r0
1029 ; SOFT-NEXT: vrev64.32 d16, d16
1030 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1031 ; SOFT-NEXT: vrev64.32 d16, d16
1032 ; SOFT-NEXT: vstr d16, [r4]
1033 ; SOFT-NEXT: pop {r4, pc}
1035 ; HARD-LABEL: test_v2i32_v1i64:
1037 ; HARD-NEXT: .save {r4, lr}
1038 ; HARD-NEXT: push {r4, lr}
1039 ; HARD-NEXT: vldr d16, [r0]
1040 ; HARD-NEXT: mov r4, r1
1041 ; HARD-NEXT: vadd.i64 d0, d16, d16
1042 ; HARD-NEXT: bl test_v2i32_v1i64_helper
1043 ; HARD-NEXT: vrev64.32 d16, d0
1044 ; HARD-NEXT: vadd.i32 d16, d16, d16
1045 ; HARD-NEXT: vrev64.32 d16, d16
1046 ; HARD-NEXT: vstr d16, [r4]
1047 ; HARD-NEXT: pop {r4, pc}
1048 %1 = load <1 x i64>, ptr %p
1049 %2 = add <1 x i64> %1, %1
1050 %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
1051 %4 = add <2 x i32> %3, %3
1052 store <2 x i32> %4, ptr %q
1056 declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
1057 define void @test_v2i32_v2f32(ptr %p, ptr %q) {
1058 ; SOFT-LABEL: test_v2i32_v2f32:
1060 ; SOFT-NEXT: .save {r4, lr}
1061 ; SOFT-NEXT: push {r4, lr}
1062 ; SOFT-NEXT: vldr d16, [r0]
1063 ; SOFT-NEXT: mov r4, r1
1064 ; SOFT-NEXT: vrev64.32 d16, d16
1065 ; SOFT-NEXT: vadd.f32 d16, d16, d16
1066 ; SOFT-NEXT: vrev64.32 d16, d16
1067 ; SOFT-NEXT: vmov r1, r0, d16
1068 ; SOFT-NEXT: bl test_v2i32_v2f32_helper
1069 ; SOFT-NEXT: vmov d16, r1, r0
1070 ; SOFT-NEXT: vrev64.32 d16, d16
1071 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1072 ; SOFT-NEXT: vrev64.32 d16, d16
1073 ; SOFT-NEXT: vstr d16, [r4]
1074 ; SOFT-NEXT: pop {r4, pc}
1076 ; HARD-LABEL: test_v2i32_v2f32:
1078 ; HARD-NEXT: .save {r4, lr}
1079 ; HARD-NEXT: push {r4, lr}
1080 ; HARD-NEXT: vldr d16, [r0]
1081 ; HARD-NEXT: mov r4, r1
1082 ; HARD-NEXT: vrev64.32 d16, d16
1083 ; HARD-NEXT: vadd.f32 d16, d16, d16
1084 ; HARD-NEXT: vrev64.32 d0, d16
1085 ; HARD-NEXT: bl test_v2i32_v2f32_helper
1086 ; HARD-NEXT: vrev64.32 d16, d0
1087 ; HARD-NEXT: vadd.i32 d16, d16, d16
1088 ; HARD-NEXT: vrev64.32 d16, d16
1089 ; HARD-NEXT: vstr d16, [r4]
1090 ; HARD-NEXT: pop {r4, pc}
1091 %1 = load <2 x float>, ptr %p
1092 %2 = fadd <2 x float> %1, %1
1093 %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
1094 %4 = add <2 x i32> %3, %3
1095 store <2 x i32> %4, ptr %q
1099 declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
1100 define void @test_v2i32_v4i16(ptr %p, ptr %q) {
1101 ; SOFT-LABEL: test_v2i32_v4i16:
1103 ; SOFT-NEXT: .save {r4, lr}
1104 ; SOFT-NEXT: push {r4, lr}
1105 ; SOFT-NEXT: vldr d16, [r0]
1106 ; SOFT-NEXT: mov r4, r1
1107 ; SOFT-NEXT: vrev64.16 d16, d16
1108 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1109 ; SOFT-NEXT: vrev64.16 d16, d16
1110 ; SOFT-NEXT: vmov r1, r0, d16
1111 ; SOFT-NEXT: bl test_v2i32_v4i16_helper
1112 ; SOFT-NEXT: vmov d16, r1, r0
1113 ; SOFT-NEXT: vrev64.32 d16, d16
1114 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1115 ; SOFT-NEXT: vrev64.32 d16, d16
1116 ; SOFT-NEXT: vstr d16, [r4]
1117 ; SOFT-NEXT: pop {r4, pc}
1119 ; HARD-LABEL: test_v2i32_v4i16:
1121 ; HARD-NEXT: .save {r4, lr}
1122 ; HARD-NEXT: push {r4, lr}
1123 ; HARD-NEXT: vldr d16, [r0]
1124 ; HARD-NEXT: mov r4, r1
1125 ; HARD-NEXT: vrev64.16 d16, d16
1126 ; HARD-NEXT: vadd.i16 d16, d16, d16
1127 ; HARD-NEXT: vrev64.16 d0, d16
1128 ; HARD-NEXT: bl test_v2i32_v4i16_helper
1129 ; HARD-NEXT: vrev64.32 d16, d0
1130 ; HARD-NEXT: vadd.i32 d16, d16, d16
1131 ; HARD-NEXT: vrev64.32 d16, d16
1132 ; HARD-NEXT: vstr d16, [r4]
1133 ; HARD-NEXT: pop {r4, pc}
1134 %1 = load <4 x i16>, ptr %p
1135 %2 = add <4 x i16> %1, %1
1136 %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
1137 %4 = add <2 x i32> %3, %3
1138 store <2 x i32> %4, ptr %q
1142 declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
1143 define void @test_v2i32_v8i8(ptr %p, ptr %q) {
1144 ; SOFT-LABEL: test_v2i32_v8i8:
1146 ; SOFT-NEXT: .save {r4, lr}
1147 ; SOFT-NEXT: push {r4, lr}
1148 ; SOFT-NEXT: vldr d16, [r0]
1149 ; SOFT-NEXT: mov r4, r1
1150 ; SOFT-NEXT: vrev64.8 d16, d16
1151 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1152 ; SOFT-NEXT: vrev64.8 d16, d16
1153 ; SOFT-NEXT: vmov r1, r0, d16
1154 ; SOFT-NEXT: bl test_v2i32_v8i8_helper
1155 ; SOFT-NEXT: vmov d16, r1, r0
1156 ; SOFT-NEXT: vrev64.32 d16, d16
1157 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1158 ; SOFT-NEXT: vrev64.32 d16, d16
1159 ; SOFT-NEXT: vstr d16, [r4]
1160 ; SOFT-NEXT: pop {r4, pc}
1162 ; HARD-LABEL: test_v2i32_v8i8:
1164 ; HARD-NEXT: .save {r4, lr}
1165 ; HARD-NEXT: push {r4, lr}
1166 ; HARD-NEXT: vldr d16, [r0]
1167 ; HARD-NEXT: mov r4, r1
1168 ; HARD-NEXT: vrev64.8 d16, d16
1169 ; HARD-NEXT: vadd.i8 d16, d16, d16
1170 ; HARD-NEXT: vrev64.8 d0, d16
1171 ; HARD-NEXT: bl test_v2i32_v8i8_helper
1172 ; HARD-NEXT: vrev64.32 d16, d0
1173 ; HARD-NEXT: vadd.i32 d16, d16, d16
1174 ; HARD-NEXT: vrev64.32 d16, d16
1175 ; HARD-NEXT: vstr d16, [r4]
1176 ; HARD-NEXT: pop {r4, pc}
1177 %1 = load <8 x i8>, ptr %p
1178 %2 = add <8 x i8> %1, %1
1179 %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
1180 %4 = add <2 x i32> %3, %3
1181 store <2 x i32> %4, ptr %q
1185 declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
1186 define void @test_v4i16_i64(ptr %p, ptr %q) {
1187 ; SOFT-LABEL: test_v4i16_i64:
1189 ; SOFT-NEXT: .save {r4, lr}
1190 ; SOFT-NEXT: push {r4, lr}
1191 ; SOFT-NEXT: mov r4, r1
1192 ; SOFT-NEXT: ldrd r0, r1, [r0]
1193 ; SOFT-NEXT: adds r1, r1, r1
1194 ; SOFT-NEXT: adc r0, r0, r0
1195 ; SOFT-NEXT: bl test_v4i16_i64_helper
1196 ; SOFT-NEXT: vmov d16, r1, r0
1197 ; SOFT-NEXT: vrev64.16 d16, d16
1198 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1199 ; SOFT-NEXT: vrev64.16 d16, d16
1200 ; SOFT-NEXT: vstr d16, [r4]
1201 ; SOFT-NEXT: pop {r4, pc}
1203 ; HARD-LABEL: test_v4i16_i64:
1205 ; HARD-NEXT: .save {r4, lr}
1206 ; HARD-NEXT: push {r4, lr}
1207 ; HARD-NEXT: mov r4, r1
1208 ; HARD-NEXT: ldrd r0, r1, [r0]
1209 ; HARD-NEXT: adds r1, r1, r1
1210 ; HARD-NEXT: adc r0, r0, r0
1211 ; HARD-NEXT: bl test_v4i16_i64_helper
1212 ; HARD-NEXT: vrev64.16 d16, d0
1213 ; HARD-NEXT: vadd.i16 d16, d16, d16
1214 ; HARD-NEXT: vrev64.16 d16, d16
1215 ; HARD-NEXT: vstr d16, [r4]
1216 ; HARD-NEXT: pop {r4, pc}
1217 %1 = load i64, ptr %p
1219 %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
1220 %4 = add <4 x i16> %3, %3
1221 store <4 x i16> %4, ptr %q
1225 declare <4 x i16> @test_v4i16_f64_helper(double %p)
1226 define void @test_v4i16_f64(ptr %p, ptr %q) {
1227 ; SOFT-LABEL: test_v4i16_f64:
1229 ; SOFT-NEXT: .save {r4, lr}
1230 ; SOFT-NEXT: push {r4, lr}
1231 ; SOFT-NEXT: vldr d16, [r0]
1232 ; SOFT-NEXT: mov r4, r1
1233 ; SOFT-NEXT: vadd.f64 d16, d16, d16
1234 ; SOFT-NEXT: vmov r1, r0, d16
1235 ; SOFT-NEXT: bl test_v4i16_f64_helper
1236 ; SOFT-NEXT: vmov d16, r1, r0
1237 ; SOFT-NEXT: vrev64.16 d16, d16
1238 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1239 ; SOFT-NEXT: vrev64.16 d16, d16
1240 ; SOFT-NEXT: vstr d16, [r4]
1241 ; SOFT-NEXT: pop {r4, pc}
1243 ; HARD-LABEL: test_v4i16_f64:
1245 ; HARD-NEXT: .save {r4, lr}
1246 ; HARD-NEXT: push {r4, lr}
1247 ; HARD-NEXT: vldr d16, [r0]
1248 ; HARD-NEXT: mov r4, r1
1249 ; HARD-NEXT: vadd.f64 d0, d16, d16
1250 ; HARD-NEXT: bl test_v4i16_f64_helper
1251 ; HARD-NEXT: vrev64.16 d16, d0
1252 ; HARD-NEXT: vadd.i16 d16, d16, d16
1253 ; HARD-NEXT: vrev64.16 d16, d16
1254 ; HARD-NEXT: vstr d16, [r4]
1255 ; HARD-NEXT: pop {r4, pc}
1256 %1 = load double, ptr %p
1257 %2 = fadd double %1, %1
1258 %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
1259 %4 = add <4 x i16> %3, %3
1260 store <4 x i16> %4, ptr %q
1264 declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
1265 define void @test_v4i16_v1i64(ptr %p, ptr %q) {
1266 ; SOFT-LABEL: test_v4i16_v1i64:
1268 ; SOFT-NEXT: .save {r4, lr}
1269 ; SOFT-NEXT: push {r4, lr}
1270 ; SOFT-NEXT: vldr d16, [r0]
1271 ; SOFT-NEXT: mov r4, r1
1272 ; SOFT-NEXT: vadd.i64 d16, d16, d16
1273 ; SOFT-NEXT: vmov r1, r0, d16
1274 ; SOFT-NEXT: bl test_v4i16_v1i64_helper
1275 ; SOFT-NEXT: vmov d16, r1, r0
1276 ; SOFT-NEXT: vrev64.16 d16, d16
1277 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1278 ; SOFT-NEXT: vrev64.16 d16, d16
1279 ; SOFT-NEXT: vstr d16, [r4]
1280 ; SOFT-NEXT: pop {r4, pc}
1282 ; HARD-LABEL: test_v4i16_v1i64:
1284 ; HARD-NEXT: .save {r4, lr}
1285 ; HARD-NEXT: push {r4, lr}
1286 ; HARD-NEXT: vldr d16, [r0]
1287 ; HARD-NEXT: mov r4, r1
1288 ; HARD-NEXT: vadd.i64 d0, d16, d16
1289 ; HARD-NEXT: bl test_v4i16_v1i64_helper
1290 ; HARD-NEXT: vrev64.16 d16, d0
1291 ; HARD-NEXT: vadd.i16 d16, d16, d16
1292 ; HARD-NEXT: vrev64.16 d16, d16
1293 ; HARD-NEXT: vstr d16, [r4]
1294 ; HARD-NEXT: pop {r4, pc}
1295 %1 = load <1 x i64>, ptr %p
1296 %2 = add <1 x i64> %1, %1
1297 %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
1298 %4 = add <4 x i16> %3, %3
1299 store <4 x i16> %4, ptr %q
1303 declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
1304 define void @test_v4i16_v2f32(ptr %p, ptr %q) {
1305 ; SOFT-LABEL: test_v4i16_v2f32:
1307 ; SOFT-NEXT: .save {r4, lr}
1308 ; SOFT-NEXT: push {r4, lr}
1309 ; SOFT-NEXT: vldr d16, [r0]
1310 ; SOFT-NEXT: mov r4, r1
1311 ; SOFT-NEXT: vrev64.32 d16, d16
1312 ; SOFT-NEXT: vadd.f32 d16, d16, d16
1313 ; SOFT-NEXT: vrev64.32 d16, d16
1314 ; SOFT-NEXT: vmov r1, r0, d16
1315 ; SOFT-NEXT: bl test_v4i16_v2f32_helper
1316 ; SOFT-NEXT: vmov d16, r1, r0
1317 ; SOFT-NEXT: vrev64.16 d16, d16
1318 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1319 ; SOFT-NEXT: vrev64.16 d16, d16
1320 ; SOFT-NEXT: vstr d16, [r4]
1321 ; SOFT-NEXT: pop {r4, pc}
1323 ; HARD-LABEL: test_v4i16_v2f32:
1325 ; HARD-NEXT: .save {r4, lr}
1326 ; HARD-NEXT: push {r4, lr}
1327 ; HARD-NEXT: vldr d16, [r0]
1328 ; HARD-NEXT: mov r4, r1
1329 ; HARD-NEXT: vrev64.32 d16, d16
1330 ; HARD-NEXT: vadd.f32 d16, d16, d16
1331 ; HARD-NEXT: vrev64.32 d0, d16
1332 ; HARD-NEXT: bl test_v4i16_v2f32_helper
1333 ; HARD-NEXT: vrev64.16 d16, d0
1334 ; HARD-NEXT: vadd.i16 d16, d16, d16
1335 ; HARD-NEXT: vrev64.16 d16, d16
1336 ; HARD-NEXT: vstr d16, [r4]
1337 ; HARD-NEXT: pop {r4, pc}
1338 %1 = load <2 x float>, ptr %p
1339 %2 = fadd <2 x float> %1, %1
1340 %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
1341 %4 = add <4 x i16> %3, %3
1342 store <4 x i16> %4, ptr %q
1346 declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
1347 define void @test_v4i16_v2i32(ptr %p, ptr %q) {
1348 ; SOFT-LABEL: test_v4i16_v2i32:
1350 ; SOFT-NEXT: .save {r4, lr}
1351 ; SOFT-NEXT: push {r4, lr}
1352 ; SOFT-NEXT: vldr d16, [r0]
1353 ; SOFT-NEXT: mov r4, r1
1354 ; SOFT-NEXT: vrev64.32 d16, d16
1355 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1356 ; SOFT-NEXT: vrev64.32 d16, d16
1357 ; SOFT-NEXT: vmov r1, r0, d16
1358 ; SOFT-NEXT: bl test_v4i16_v2i32_helper
1359 ; SOFT-NEXT: vmov d16, r1, r0
1360 ; SOFT-NEXT: vrev64.16 d16, d16
1361 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1362 ; SOFT-NEXT: vrev64.16 d16, d16
1363 ; SOFT-NEXT: vstr d16, [r4]
1364 ; SOFT-NEXT: pop {r4, pc}
1366 ; HARD-LABEL: test_v4i16_v2i32:
1368 ; HARD-NEXT: .save {r4, lr}
1369 ; HARD-NEXT: push {r4, lr}
1370 ; HARD-NEXT: vldr d16, [r0]
1371 ; HARD-NEXT: mov r4, r1
1372 ; HARD-NEXT: vrev64.32 d16, d16
1373 ; HARD-NEXT: vadd.i32 d16, d16, d16
1374 ; HARD-NEXT: vrev64.32 d0, d16
1375 ; HARD-NEXT: bl test_v4i16_v2i32_helper
1376 ; HARD-NEXT: vrev64.16 d16, d0
1377 ; HARD-NEXT: vadd.i16 d16, d16, d16
1378 ; HARD-NEXT: vrev64.16 d16, d16
1379 ; HARD-NEXT: vstr d16, [r4]
1380 ; HARD-NEXT: pop {r4, pc}
1381 %1 = load <2 x i32>, ptr %p
1382 %2 = add <2 x i32> %1, %1
1383 %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
1384 %4 = add <4 x i16> %3, %3
1385 store <4 x i16> %4, ptr %q
1389 declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
1390 define void @test_v4i16_v8i8(ptr %p, ptr %q) {
1391 ; SOFT-LABEL: test_v4i16_v8i8:
1393 ; SOFT-NEXT: .save {r4, lr}
1394 ; SOFT-NEXT: push {r4, lr}
1395 ; SOFT-NEXT: vldr d16, [r0]
1396 ; SOFT-NEXT: mov r4, r1
1397 ; SOFT-NEXT: vrev64.8 d16, d16
1398 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1399 ; SOFT-NEXT: vrev64.8 d16, d16
1400 ; SOFT-NEXT: vmov r1, r0, d16
1401 ; SOFT-NEXT: bl test_v4i16_v8i8_helper
1402 ; SOFT-NEXT: vmov d16, r1, r0
1403 ; SOFT-NEXT: vrev64.16 d16, d16
1404 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1405 ; SOFT-NEXT: vrev64.16 d16, d16
1406 ; SOFT-NEXT: vstr d16, [r4]
1407 ; SOFT-NEXT: pop {r4, pc}
1409 ; HARD-LABEL: test_v4i16_v8i8:
1411 ; HARD-NEXT: .save {r4, lr}
1412 ; HARD-NEXT: push {r4, lr}
1413 ; HARD-NEXT: vldr d16, [r0]
1414 ; HARD-NEXT: mov r4, r1
1415 ; HARD-NEXT: vrev64.8 d16, d16
1416 ; HARD-NEXT: vadd.i8 d16, d16, d16
1417 ; HARD-NEXT: vrev64.8 d0, d16
1418 ; HARD-NEXT: bl test_v4i16_v8i8_helper
1419 ; HARD-NEXT: vrev64.16 d16, d0
1420 ; HARD-NEXT: vadd.i16 d16, d16, d16
1421 ; HARD-NEXT: vrev64.16 d16, d16
1422 ; HARD-NEXT: vstr d16, [r4]
1423 ; HARD-NEXT: pop {r4, pc}
1424 %1 = load <8 x i8>, ptr %p
1425 %2 = add <8 x i8> %1, %1
1426 %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
1427 %4 = add <4 x i16> %3, %3
1428 store <4 x i16> %4, ptr %q
1432 declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
1433 define void @test_v8i8_i64(ptr %p, ptr %q) {
1434 ; SOFT-LABEL: test_v8i8_i64:
1436 ; SOFT-NEXT: .save {r4, lr}
1437 ; SOFT-NEXT: push {r4, lr}
1438 ; SOFT-NEXT: mov r4, r1
1439 ; SOFT-NEXT: ldrd r0, r1, [r0]
1440 ; SOFT-NEXT: adds r1, r1, r1
1441 ; SOFT-NEXT: adc r0, r0, r0
1442 ; SOFT-NEXT: bl test_v8i8_i64_helper
1443 ; SOFT-NEXT: vmov d16, r1, r0
1444 ; SOFT-NEXT: vrev64.8 d16, d16
1445 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1446 ; SOFT-NEXT: vrev64.8 d16, d16
1447 ; SOFT-NEXT: vstr d16, [r4]
1448 ; SOFT-NEXT: pop {r4, pc}
1450 ; HARD-LABEL: test_v8i8_i64:
1452 ; HARD-NEXT: .save {r4, lr}
1453 ; HARD-NEXT: push {r4, lr}
1454 ; HARD-NEXT: mov r4, r1
1455 ; HARD-NEXT: ldrd r0, r1, [r0]
1456 ; HARD-NEXT: adds r1, r1, r1
1457 ; HARD-NEXT: adc r0, r0, r0
1458 ; HARD-NEXT: bl test_v8i8_i64_helper
1459 ; HARD-NEXT: vrev64.8 d16, d0
1460 ; HARD-NEXT: vadd.i8 d16, d16, d16
1461 ; HARD-NEXT: vrev64.8 d16, d16
1462 ; HARD-NEXT: vstr d16, [r4]
1463 ; HARD-NEXT: pop {r4, pc}
1464 %1 = load i64, ptr %p
1466 %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
1467 %4 = add <8 x i8> %3, %3
1468 store <8 x i8> %4, ptr %q
1472 declare <8 x i8> @test_v8i8_f64_helper(double %p)
1473 define void @test_v8i8_f64(ptr %p, ptr %q) {
1474 ; SOFT-LABEL: test_v8i8_f64:
1476 ; SOFT-NEXT: .save {r4, lr}
1477 ; SOFT-NEXT: push {r4, lr}
1478 ; SOFT-NEXT: vldr d16, [r0]
1479 ; SOFT-NEXT: mov r4, r1
1480 ; SOFT-NEXT: vadd.f64 d16, d16, d16
1481 ; SOFT-NEXT: vmov r1, r0, d16
1482 ; SOFT-NEXT: bl test_v8i8_f64_helper
1483 ; SOFT-NEXT: vmov d16, r1, r0
1484 ; SOFT-NEXT: vrev64.8 d16, d16
1485 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1486 ; SOFT-NEXT: vrev64.8 d16, d16
1487 ; SOFT-NEXT: vstr d16, [r4]
1488 ; SOFT-NEXT: pop {r4, pc}
1490 ; HARD-LABEL: test_v8i8_f64:
1492 ; HARD-NEXT: .save {r4, lr}
1493 ; HARD-NEXT: push {r4, lr}
1494 ; HARD-NEXT: vldr d16, [r0]
1495 ; HARD-NEXT: mov r4, r1
1496 ; HARD-NEXT: vadd.f64 d0, d16, d16
1497 ; HARD-NEXT: bl test_v8i8_f64_helper
1498 ; HARD-NEXT: vrev64.8 d16, d0
1499 ; HARD-NEXT: vadd.i8 d16, d16, d16
1500 ; HARD-NEXT: vrev64.8 d16, d16
1501 ; HARD-NEXT: vstr d16, [r4]
1502 ; HARD-NEXT: pop {r4, pc}
1503 %1 = load double, ptr %p
1504 %2 = fadd double %1, %1
1505 %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
1506 %4 = add <8 x i8> %3, %3
1507 store <8 x i8> %4, ptr %q
1511 declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
1512 define void @test_v8i8_v1i64(ptr %p, ptr %q) {
1513 ; SOFT-LABEL: test_v8i8_v1i64:
1515 ; SOFT-NEXT: .save {r4, lr}
1516 ; SOFT-NEXT: push {r4, lr}
1517 ; SOFT-NEXT: vldr d16, [r0]
1518 ; SOFT-NEXT: mov r4, r1
1519 ; SOFT-NEXT: vadd.i64 d16, d16, d16
1520 ; SOFT-NEXT: vmov r1, r0, d16
1521 ; SOFT-NEXT: bl test_v8i8_v1i64_helper
1522 ; SOFT-NEXT: vmov d16, r1, r0
1523 ; SOFT-NEXT: vrev64.8 d16, d16
1524 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1525 ; SOFT-NEXT: vrev64.8 d16, d16
1526 ; SOFT-NEXT: vstr d16, [r4]
1527 ; SOFT-NEXT: pop {r4, pc}
1529 ; HARD-LABEL: test_v8i8_v1i64:
1531 ; HARD-NEXT: .save {r4, lr}
1532 ; HARD-NEXT: push {r4, lr}
1533 ; HARD-NEXT: vldr d16, [r0]
1534 ; HARD-NEXT: mov r4, r1
1535 ; HARD-NEXT: vadd.i64 d0, d16, d16
1536 ; HARD-NEXT: bl test_v8i8_v1i64_helper
1537 ; HARD-NEXT: vrev64.8 d16, d0
1538 ; HARD-NEXT: vadd.i8 d16, d16, d16
1539 ; HARD-NEXT: vrev64.8 d16, d16
1540 ; HARD-NEXT: vstr d16, [r4]
1541 ; HARD-NEXT: pop {r4, pc}
1542 %1 = load <1 x i64>, ptr %p
1543 %2 = add <1 x i64> %1, %1
1544 %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
1545 %4 = add <8 x i8> %3, %3
1546 store <8 x i8> %4, ptr %q
1550 declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
1551 define void @test_v8i8_v2f32(ptr %p, ptr %q) {
1552 ; SOFT-LABEL: test_v8i8_v2f32:
1554 ; SOFT-NEXT: .save {r4, lr}
1555 ; SOFT-NEXT: push {r4, lr}
1556 ; SOFT-NEXT: vldr d16, [r0]
1557 ; SOFT-NEXT: mov r4, r1
1558 ; SOFT-NEXT: vrev64.32 d16, d16
1559 ; SOFT-NEXT: vadd.f32 d16, d16, d16
1560 ; SOFT-NEXT: vrev64.32 d16, d16
1561 ; SOFT-NEXT: vmov r1, r0, d16
1562 ; SOFT-NEXT: bl test_v8i8_v2f32_helper
1563 ; SOFT-NEXT: vmov d16, r1, r0
1564 ; SOFT-NEXT: vrev64.8 d16, d16
1565 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1566 ; SOFT-NEXT: vrev64.8 d16, d16
1567 ; SOFT-NEXT: vstr d16, [r4]
1568 ; SOFT-NEXT: pop {r4, pc}
1570 ; HARD-LABEL: test_v8i8_v2f32:
1572 ; HARD-NEXT: .save {r4, lr}
1573 ; HARD-NEXT: push {r4, lr}
1574 ; HARD-NEXT: vldr d16, [r0]
1575 ; HARD-NEXT: mov r4, r1
1576 ; HARD-NEXT: vrev64.32 d16, d16
1577 ; HARD-NEXT: vadd.f32 d16, d16, d16
1578 ; HARD-NEXT: vrev64.32 d0, d16
1579 ; HARD-NEXT: bl test_v8i8_v2f32_helper
1580 ; HARD-NEXT: vrev64.8 d16, d0
1581 ; HARD-NEXT: vadd.i8 d16, d16, d16
1582 ; HARD-NEXT: vrev64.8 d16, d16
1583 ; HARD-NEXT: vstr d16, [r4]
1584 ; HARD-NEXT: pop {r4, pc}
1585 %1 = load <2 x float>, ptr %p
1586 %2 = fadd <2 x float> %1, %1
1587 %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
1588 %4 = add <8 x i8> %3, %3
1589 store <8 x i8> %4, ptr %q
1593 declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
1594 define void @test_v8i8_v2i32(ptr %p, ptr %q) {
1595 ; SOFT-LABEL: test_v8i8_v2i32:
1597 ; SOFT-NEXT: .save {r4, lr}
1598 ; SOFT-NEXT: push {r4, lr}
1599 ; SOFT-NEXT: vldr d16, [r0]
1600 ; SOFT-NEXT: mov r4, r1
1601 ; SOFT-NEXT: vrev64.32 d16, d16
1602 ; SOFT-NEXT: vadd.i32 d16, d16, d16
1603 ; SOFT-NEXT: vrev64.32 d16, d16
1604 ; SOFT-NEXT: vmov r1, r0, d16
1605 ; SOFT-NEXT: bl test_v8i8_v2i32_helper
1606 ; SOFT-NEXT: vmov d16, r1, r0
1607 ; SOFT-NEXT: vrev64.8 d16, d16
1608 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1609 ; SOFT-NEXT: vrev64.8 d16, d16
1610 ; SOFT-NEXT: vstr d16, [r4]
1611 ; SOFT-NEXT: pop {r4, pc}
1613 ; HARD-LABEL: test_v8i8_v2i32:
1615 ; HARD-NEXT: .save {r4, lr}
1616 ; HARD-NEXT: push {r4, lr}
1617 ; HARD-NEXT: vldr d16, [r0]
1618 ; HARD-NEXT: mov r4, r1
1619 ; HARD-NEXT: vrev64.32 d16, d16
1620 ; HARD-NEXT: vadd.i32 d16, d16, d16
1621 ; HARD-NEXT: vrev64.32 d0, d16
1622 ; HARD-NEXT: bl test_v8i8_v2i32_helper
1623 ; HARD-NEXT: vrev64.8 d16, d0
1624 ; HARD-NEXT: vadd.i8 d16, d16, d16
1625 ; HARD-NEXT: vrev64.8 d16, d16
1626 ; HARD-NEXT: vstr d16, [r4]
1627 ; HARD-NEXT: pop {r4, pc}
1628 %1 = load <2 x i32>, ptr %p
1629 %2 = add <2 x i32> %1, %1
1630 %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
1631 %4 = add <8 x i8> %3, %3
1632 store <8 x i8> %4, ptr %q
1636 declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
1637 define void @test_v8i8_v4i16(ptr %p, ptr %q) {
1638 ; SOFT-LABEL: test_v8i8_v4i16:
1640 ; SOFT-NEXT: .save {r4, lr}
1641 ; SOFT-NEXT: push {r4, lr}
1642 ; SOFT-NEXT: vldr d16, [r0]
1643 ; SOFT-NEXT: mov r4, r1
1644 ; SOFT-NEXT: vrev64.16 d16, d16
1645 ; SOFT-NEXT: vadd.i16 d16, d16, d16
1646 ; SOFT-NEXT: vrev64.16 d16, d16
1647 ; SOFT-NEXT: vmov r1, r0, d16
1648 ; SOFT-NEXT: bl test_v8i8_v4i16_helper
1649 ; SOFT-NEXT: vmov d16, r1, r0
1650 ; SOFT-NEXT: vrev64.8 d16, d16
1651 ; SOFT-NEXT: vadd.i8 d16, d16, d16
1652 ; SOFT-NEXT: vrev64.8 d16, d16
1653 ; SOFT-NEXT: vstr d16, [r4]
1654 ; SOFT-NEXT: pop {r4, pc}
1656 ; HARD-LABEL: test_v8i8_v4i16:
1658 ; HARD-NEXT: .save {r4, lr}
1659 ; HARD-NEXT: push {r4, lr}
1660 ; HARD-NEXT: vldr d16, [r0]
1661 ; HARD-NEXT: mov r4, r1
1662 ; HARD-NEXT: vrev64.16 d16, d16
1663 ; HARD-NEXT: vadd.i16 d16, d16, d16
1664 ; HARD-NEXT: vrev64.16 d0, d16
1665 ; HARD-NEXT: bl test_v8i8_v4i16_helper
1666 ; HARD-NEXT: vrev64.8 d16, d0
1667 ; HARD-NEXT: vadd.i8 d16, d16, d16
1668 ; HARD-NEXT: vrev64.8 d16, d16
1669 ; HARD-NEXT: vstr d16, [r4]
1670 ; HARD-NEXT: pop {r4, pc}
1671 %1 = load <4 x i16>, ptr %p
1672 %2 = add <4 x i16> %1, %1
1673 %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
1674 %4 = add <8 x i8> %3, %3
1675 store <8 x i8> %4, ptr %q
1679 declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
1680 define void @test_f128_v2f64(ptr %p, ptr %q) {
1681 ; SOFT-LABEL: test_f128_v2f64:
1683 ; SOFT-NEXT: .save {r4, lr}
1684 ; SOFT-NEXT: push {r4, lr}
1685 ; SOFT-NEXT: .pad #16
1686 ; SOFT-NEXT: sub sp, sp, #16
1687 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1688 ; SOFT-NEXT: mov r4, r1
1689 ; SOFT-NEXT: vadd.f64 d18, d16, d16
1690 ; SOFT-NEXT: vadd.f64 d16, d17, d17
1691 ; SOFT-NEXT: vmov r1, r0, d18
1692 ; SOFT-NEXT: vmov r3, r2, d16
1693 ; SOFT-NEXT: bl test_f128_v2f64_helper
1694 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1695 ; SOFT-NEXT: bl __addtf3
1696 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1697 ; SOFT-NEXT: add sp, sp, #16
1698 ; SOFT-NEXT: pop {r4, pc}
1700 ; HARD-LABEL: test_f128_v2f64:
1702 ; HARD-NEXT: .save {r4, lr}
1703 ; HARD-NEXT: push {r4, lr}
1704 ; HARD-NEXT: .pad #16
1705 ; HARD-NEXT: sub sp, sp, #16
1706 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1707 ; HARD-NEXT: mov r4, r1
1708 ; HARD-NEXT: vadd.f64 d1, d17, d17
1709 ; HARD-NEXT: vadd.f64 d0, d16, d16
1710 ; HARD-NEXT: bl test_f128_v2f64_helper
1711 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1712 ; HARD-NEXT: bl __addtf3
1713 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1714 ; HARD-NEXT: add sp, sp, #16
1715 ; HARD-NEXT: pop {r4, pc}
1716 %1 = load <2 x double>, ptr %p
1717 %2 = fadd <2 x double> %1, %1
1718 %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
1719 %4 = fadd fp128 %3, %3
1720 store fp128 %4, ptr %q
1724 declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
1725 define void @test_f128_v2i64(ptr %p, ptr %q) {
1726 ; SOFT-LABEL: test_f128_v2i64:
1728 ; SOFT-NEXT: .save {r4, lr}
1729 ; SOFT-NEXT: push {r4, lr}
1730 ; SOFT-NEXT: .pad #16
1731 ; SOFT-NEXT: sub sp, sp, #16
1732 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1733 ; SOFT-NEXT: mov r4, r1
1734 ; SOFT-NEXT: vadd.i64 q8, q8, q8
1735 ; SOFT-NEXT: vmov r1, r0, d16
1736 ; SOFT-NEXT: vmov r3, r2, d17
1737 ; SOFT-NEXT: bl test_f128_v2i64_helper
1738 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1739 ; SOFT-NEXT: bl __addtf3
1740 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1741 ; SOFT-NEXT: add sp, sp, #16
1742 ; SOFT-NEXT: pop {r4, pc}
1744 ; HARD-LABEL: test_f128_v2i64:
1746 ; HARD-NEXT: .save {r4, lr}
1747 ; HARD-NEXT: push {r4, lr}
1748 ; HARD-NEXT: .pad #16
1749 ; HARD-NEXT: sub sp, sp, #16
1750 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1751 ; HARD-NEXT: mov r4, r1
1752 ; HARD-NEXT: vadd.i64 q0, q8, q8
1753 ; HARD-NEXT: bl test_f128_v2i64_helper
1754 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1755 ; HARD-NEXT: bl __addtf3
1756 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1757 ; HARD-NEXT: add sp, sp, #16
1758 ; HARD-NEXT: pop {r4, pc}
1759 %1 = load <2 x i64>, ptr %p
1760 %2 = add <2 x i64> %1, %1
1761 %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
1762 %4 = fadd fp128 %3, %3
1763 store fp128 %4, ptr %q
1767 declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
1768 define void @test_f128_v4f32(ptr %p, ptr %q) {
1769 ; SOFT-LABEL: test_f128_v4f32:
1771 ; SOFT-NEXT: .save {r4, lr}
1772 ; SOFT-NEXT: push {r4, lr}
1773 ; SOFT-NEXT: .pad #16
1774 ; SOFT-NEXT: sub sp, sp, #16
1775 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1776 ; SOFT-NEXT: mov r4, r1
1777 ; SOFT-NEXT: vrev64.32 q8, q8
1778 ; SOFT-NEXT: vadd.f32 q8, q8, q8
1779 ; SOFT-NEXT: vrev64.32 q8, q8
1780 ; SOFT-NEXT: vmov r1, r0, d16
1781 ; SOFT-NEXT: vmov r3, r2, d17
1782 ; SOFT-NEXT: bl test_f128_v4f32_helper
1783 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1784 ; SOFT-NEXT: bl __addtf3
1785 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1786 ; SOFT-NEXT: add sp, sp, #16
1787 ; SOFT-NEXT: pop {r4, pc}
1789 ; HARD-LABEL: test_f128_v4f32:
1791 ; HARD-NEXT: .save {r4, lr}
1792 ; HARD-NEXT: push {r4, lr}
1793 ; HARD-NEXT: .pad #16
1794 ; HARD-NEXT: sub sp, sp, #16
1795 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1796 ; HARD-NEXT: mov r4, r1
1797 ; HARD-NEXT: vrev64.32 q8, q8
1798 ; HARD-NEXT: vadd.f32 q8, q8, q8
1799 ; HARD-NEXT: vrev64.32 q0, q8
1800 ; HARD-NEXT: bl test_f128_v4f32_helper
1801 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1802 ; HARD-NEXT: bl __addtf3
1803 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1804 ; HARD-NEXT: add sp, sp, #16
1805 ; HARD-NEXT: pop {r4, pc}
1806 %1 = load <4 x float>, ptr %p
1807 %2 = fadd <4 x float> %1, %1
1808 %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
1809 %4 = fadd fp128 %3, %3
1810 store fp128 %4, ptr %q
1814 declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
1815 define void @test_f128_v4i32(ptr %p, ptr %q) {
1816 ; SOFT-LABEL: test_f128_v4i32:
1818 ; SOFT-NEXT: .save {r4, lr}
1819 ; SOFT-NEXT: push {r4, lr}
1820 ; SOFT-NEXT: .pad #16
1821 ; SOFT-NEXT: sub sp, sp, #16
1822 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1823 ; SOFT-NEXT: mov r4, r1
1824 ; SOFT-NEXT: vrev64.32 q8, q8
1825 ; SOFT-NEXT: vadd.i32 q8, q8, q8
1826 ; SOFT-NEXT: vrev64.32 q8, q8
1827 ; SOFT-NEXT: vmov r1, r0, d16
1828 ; SOFT-NEXT: vmov r3, r2, d17
1829 ; SOFT-NEXT: bl test_f128_v4i32_helper
1830 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1831 ; SOFT-NEXT: bl __addtf3
1832 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1833 ; SOFT-NEXT: add sp, sp, #16
1834 ; SOFT-NEXT: pop {r4, pc}
1836 ; HARD-LABEL: test_f128_v4i32:
1838 ; HARD-NEXT: .save {r4, lr}
1839 ; HARD-NEXT: push {r4, lr}
1840 ; HARD-NEXT: .pad #16
1841 ; HARD-NEXT: sub sp, sp, #16
1842 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1843 ; HARD-NEXT: mov r4, r1
1844 ; HARD-NEXT: vrev64.32 q8, q8
1845 ; HARD-NEXT: vadd.i32 q8, q8, q8
1846 ; HARD-NEXT: vrev64.32 q0, q8
1847 ; HARD-NEXT: bl test_f128_v4i32_helper
1848 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1849 ; HARD-NEXT: bl __addtf3
1850 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1851 ; HARD-NEXT: add sp, sp, #16
1852 ; HARD-NEXT: pop {r4, pc}
1853 %1 = load <4 x i32>, ptr %p
1854 %2 = add <4 x i32> %1, %1
1855 %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
1856 %4 = fadd fp128 %3, %3
1857 store fp128 %4, ptr %q
1861 declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
1862 define void @test_f128_v8i16(ptr %p, ptr %q) {
1863 ; SOFT-LABEL: test_f128_v8i16:
1865 ; SOFT-NEXT: .save {r4, lr}
1866 ; SOFT-NEXT: push {r4, lr}
1867 ; SOFT-NEXT: .pad #16
1868 ; SOFT-NEXT: sub sp, sp, #16
1869 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1870 ; SOFT-NEXT: mov r4, r1
1871 ; SOFT-NEXT: vrev64.16 q8, q8
1872 ; SOFT-NEXT: vadd.i16 q8, q8, q8
1873 ; SOFT-NEXT: vrev64.16 q8, q8
1874 ; SOFT-NEXT: vmov r1, r0, d16
1875 ; SOFT-NEXT: vmov r3, r2, d17
1876 ; SOFT-NEXT: bl test_f128_v8i16_helper
1877 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1878 ; SOFT-NEXT: bl __addtf3
1879 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1880 ; SOFT-NEXT: add sp, sp, #16
1881 ; SOFT-NEXT: pop {r4, pc}
1883 ; HARD-LABEL: test_f128_v8i16:
1885 ; HARD-NEXT: .save {r4, lr}
1886 ; HARD-NEXT: push {r4, lr}
1887 ; HARD-NEXT: .pad #16
1888 ; HARD-NEXT: sub sp, sp, #16
1889 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1890 ; HARD-NEXT: mov r4, r1
1891 ; HARD-NEXT: vrev64.16 q8, q8
1892 ; HARD-NEXT: vadd.i16 q8, q8, q8
1893 ; HARD-NEXT: vrev64.16 q0, q8
1894 ; HARD-NEXT: bl test_f128_v8i16_helper
1895 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1896 ; HARD-NEXT: bl __addtf3
1897 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1898 ; HARD-NEXT: add sp, sp, #16
1899 ; HARD-NEXT: pop {r4, pc}
1900 %1 = load <8 x i16>, ptr %p
1901 %2 = add <8 x i16> %1, %1
1902 %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
1903 %4 = fadd fp128 %3, %3
1904 store fp128 %4, ptr %q
1908 declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
1909 define void @test_f128_v16i8(ptr %p, ptr %q) {
1910 ; SOFT-LABEL: test_f128_v16i8:
1912 ; SOFT-NEXT: .save {r4, lr}
1913 ; SOFT-NEXT: push {r4, lr}
1914 ; SOFT-NEXT: .pad #16
1915 ; SOFT-NEXT: sub sp, sp, #16
1916 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
1917 ; SOFT-NEXT: mov r4, r1
1918 ; SOFT-NEXT: vrev64.8 q8, q8
1919 ; SOFT-NEXT: vadd.i8 q8, q8, q8
1920 ; SOFT-NEXT: vrev64.8 q8, q8
1921 ; SOFT-NEXT: vmov r1, r0, d16
1922 ; SOFT-NEXT: vmov r3, r2, d17
1923 ; SOFT-NEXT: bl test_f128_v16i8_helper
1924 ; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
1925 ; SOFT-NEXT: bl __addtf3
1926 ; SOFT-NEXT: stm r4, {r0, r1, r2, r3}
1927 ; SOFT-NEXT: add sp, sp, #16
1928 ; SOFT-NEXT: pop {r4, pc}
1930 ; HARD-LABEL: test_f128_v16i8:
1932 ; HARD-NEXT: .save {r4, lr}
1933 ; HARD-NEXT: push {r4, lr}
1934 ; HARD-NEXT: .pad #16
1935 ; HARD-NEXT: sub sp, sp, #16
1936 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
1937 ; HARD-NEXT: mov r4, r1
1938 ; HARD-NEXT: vrev64.8 q8, q8
1939 ; HARD-NEXT: vadd.i8 q8, q8, q8
1940 ; HARD-NEXT: vrev64.8 q0, q8
1941 ; HARD-NEXT: bl test_f128_v16i8_helper
1942 ; HARD-NEXT: stm sp, {r0, r1, r2, r3}
1943 ; HARD-NEXT: bl __addtf3
1944 ; HARD-NEXT: stm r4, {r0, r1, r2, r3}
1945 ; HARD-NEXT: add sp, sp, #16
1946 ; HARD-NEXT: pop {r4, pc}
1947 %1 = load <16 x i8>, ptr %p
1948 %2 = add <16 x i8> %1, %1
1949 %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
1950 %4 = fadd fp128 %3, %3
1951 store fp128 %4, ptr %q
1955 declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
1956 define void @test_v2f64_f128(ptr %p, ptr %q) {
1957 ; SOFT-LABEL: test_v2f64_f128:
1959 ; SOFT-NEXT: .save {r4, r5, r11, lr}
1960 ; SOFT-NEXT: push {r4, r5, r11, lr}
1961 ; SOFT-NEXT: .pad #16
1962 ; SOFT-NEXT: sub sp, sp, #16
1963 ; SOFT-NEXT: ldr r4, [r0]
1964 ; SOFT-NEXT: mov r5, r1
1965 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
1966 ; SOFT-NEXT: mov r0, r4
1967 ; SOFT-NEXT: str r4, [sp]
1968 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
1969 ; SOFT-NEXT: bl __addtf3
1970 ; SOFT-NEXT: bl test_v2f64_f128_helper
1971 ; SOFT-NEXT: vmov d16, r3, r2
1972 ; SOFT-NEXT: vmov d17, r1, r0
1973 ; SOFT-NEXT: vadd.f64 d19, d16, d16
1974 ; SOFT-NEXT: vadd.f64 d18, d17, d17
1975 ; SOFT-NEXT: vst1.64 {d18, d19}, [r5]
1976 ; SOFT-NEXT: add sp, sp, #16
1977 ; SOFT-NEXT: pop {r4, r5, r11, pc}
1979 ; HARD-LABEL: test_v2f64_f128:
1981 ; HARD-NEXT: .save {r4, r5, r11, lr}
1982 ; HARD-NEXT: push {r4, r5, r11, lr}
1983 ; HARD-NEXT: .pad #16
1984 ; HARD-NEXT: sub sp, sp, #16
1985 ; HARD-NEXT: ldr r4, [r0]
1986 ; HARD-NEXT: mov r5, r1
1987 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
1988 ; HARD-NEXT: mov r0, r4
1989 ; HARD-NEXT: str r4, [sp]
1990 ; HARD-NEXT: stmib sp, {r1, r2, r3}
1991 ; HARD-NEXT: bl __addtf3
1992 ; HARD-NEXT: bl test_v2f64_f128_helper
1993 ; HARD-NEXT: vadd.f64 d17, d1, d1
1994 ; HARD-NEXT: vadd.f64 d16, d0, d0
1995 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
1996 ; HARD-NEXT: add sp, sp, #16
1997 ; HARD-NEXT: pop {r4, r5, r11, pc}
1998 %1 = load fp128, ptr %p
1999 %2 = fadd fp128 %1, %1
2000 %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
2001 %4 = fadd <2 x double> %3, %3
2002 store <2 x double> %4, ptr %q
2007 declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
2008 define void @test_v2f64_v2i64(ptr %p, ptr %q) {
2009 ; SOFT-LABEL: test_v2f64_v2i64:
2011 ; SOFT-NEXT: .save {r4, lr}
2012 ; SOFT-NEXT: push {r4, lr}
2013 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2014 ; SOFT-NEXT: mov r4, r1
2015 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2016 ; SOFT-NEXT: vmov r1, r0, d16
2017 ; SOFT-NEXT: vmov r3, r2, d17
2018 ; SOFT-NEXT: bl test_v2f64_v2i64_helper
2019 ; SOFT-NEXT: vmov d16, r3, r2
2020 ; SOFT-NEXT: vmov d17, r1, r0
2021 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2022 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2023 ; SOFT-NEXT: vst1.64 {d18, d19}, [r4]
2024 ; SOFT-NEXT: pop {r4, pc}
2026 ; HARD-LABEL: test_v2f64_v2i64:
2028 ; HARD-NEXT: .save {r4, lr}
2029 ; HARD-NEXT: push {r4, lr}
2030 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2031 ; HARD-NEXT: mov r4, r1
2032 ; HARD-NEXT: vadd.i64 q0, q8, q8
2033 ; HARD-NEXT: bl test_v2f64_v2i64_helper
2034 ; HARD-NEXT: vadd.f64 d17, d1, d1
2035 ; HARD-NEXT: vadd.f64 d16, d0, d0
2036 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2037 ; HARD-NEXT: pop {r4, pc}
2038 %1 = load <2 x i64>, ptr %p
2039 %2 = add <2 x i64> %1, %1
2040 %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
2041 %4 = fadd <2 x double> %3, %3
2042 store <2 x double> %4, ptr %q
2046 declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
2047 define void @test_v2f64_v4f32(ptr %p, ptr %q) {
2048 ; SOFT-LABEL: test_v2f64_v4f32:
2050 ; SOFT-NEXT: .save {r4, lr}
2051 ; SOFT-NEXT: push {r4, lr}
2052 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2053 ; SOFT-NEXT: mov r4, r1
2054 ; SOFT-NEXT: vrev64.32 q8, q8
2055 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2056 ; SOFT-NEXT: vrev64.32 q8, q8
2057 ; SOFT-NEXT: vmov r1, r0, d16
2058 ; SOFT-NEXT: vmov r3, r2, d17
2059 ; SOFT-NEXT: bl test_v2f64_v4f32_helper
2060 ; SOFT-NEXT: vmov d16, r3, r2
2061 ; SOFT-NEXT: vmov d17, r1, r0
2062 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2063 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2064 ; SOFT-NEXT: vst1.64 {d18, d19}, [r4]
2065 ; SOFT-NEXT: pop {r4, pc}
2067 ; HARD-LABEL: test_v2f64_v4f32:
2069 ; HARD-NEXT: .save {r4, lr}
2070 ; HARD-NEXT: push {r4, lr}
2071 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2072 ; HARD-NEXT: mov r4, r1
2073 ; HARD-NEXT: vrev64.32 q8, q8
2074 ; HARD-NEXT: vadd.f32 q8, q8, q8
2075 ; HARD-NEXT: vrev64.32 q0, q8
2076 ; HARD-NEXT: bl test_v2f64_v4f32_helper
2077 ; HARD-NEXT: vadd.f64 d17, d1, d1
2078 ; HARD-NEXT: vadd.f64 d16, d0, d0
2079 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2080 ; HARD-NEXT: pop {r4, pc}
2081 %1 = load <4 x float>, ptr %p
2082 %2 = fadd <4 x float> %1, %1
2083 %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
2084 %4 = fadd <2 x double> %3, %3
2085 store <2 x double> %4, ptr %q
2089 declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
2090 define void @test_v2f64_v4i32(ptr %p, ptr %q) {
2091 ; SOFT-LABEL: test_v2f64_v4i32:
2093 ; SOFT-NEXT: .save {r4, lr}
2094 ; SOFT-NEXT: push {r4, lr}
2095 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2096 ; SOFT-NEXT: mov r4, r1
2097 ; SOFT-NEXT: vrev64.32 q8, q8
2098 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2099 ; SOFT-NEXT: vrev64.32 q8, q8
2100 ; SOFT-NEXT: vmov r1, r0, d16
2101 ; SOFT-NEXT: vmov r3, r2, d17
2102 ; SOFT-NEXT: bl test_v2f64_v4i32_helper
2103 ; SOFT-NEXT: vmov d16, r3, r2
2104 ; SOFT-NEXT: vmov d17, r1, r0
2105 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2106 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2107 ; SOFT-NEXT: vst1.64 {d18, d19}, [r4]
2108 ; SOFT-NEXT: pop {r4, pc}
2110 ; HARD-LABEL: test_v2f64_v4i32:
2112 ; HARD-NEXT: .save {r4, lr}
2113 ; HARD-NEXT: push {r4, lr}
2114 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2115 ; HARD-NEXT: mov r4, r1
2116 ; HARD-NEXT: vrev64.32 q8, q8
2117 ; HARD-NEXT: vadd.i32 q8, q8, q8
2118 ; HARD-NEXT: vrev64.32 q0, q8
2119 ; HARD-NEXT: bl test_v2f64_v4i32_helper
2120 ; HARD-NEXT: vadd.f64 d17, d1, d1
2121 ; HARD-NEXT: vadd.f64 d16, d0, d0
2122 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2123 ; HARD-NEXT: pop {r4, pc}
2124 %1 = load <4 x i32>, ptr %p
2125 %2 = add <4 x i32> %1, %1
2126 %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
2127 %4 = fadd <2 x double> %3, %3
2128 store <2 x double> %4, ptr %q
2132 declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
2133 define void @test_v2f64_v8i16(ptr %p, ptr %q) {
2134 ; SOFT-LABEL: test_v2f64_v8i16:
2136 ; SOFT-NEXT: .save {r4, lr}
2137 ; SOFT-NEXT: push {r4, lr}
2138 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2139 ; SOFT-NEXT: mov r4, r1
2140 ; SOFT-NEXT: vrev64.16 q8, q8
2141 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2142 ; SOFT-NEXT: vrev64.16 q8, q8
2143 ; SOFT-NEXT: vmov r1, r0, d16
2144 ; SOFT-NEXT: vmov r3, r2, d17
2145 ; SOFT-NEXT: bl test_v2f64_v8i16_helper
2146 ; SOFT-NEXT: vmov d16, r3, r2
2147 ; SOFT-NEXT: vmov d17, r1, r0
2148 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2149 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2150 ; SOFT-NEXT: vst1.64 {d18, d19}, [r4]
2151 ; SOFT-NEXT: pop {r4, pc}
2153 ; HARD-LABEL: test_v2f64_v8i16:
2155 ; HARD-NEXT: .save {r4, lr}
2156 ; HARD-NEXT: push {r4, lr}
2157 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2158 ; HARD-NEXT: mov r4, r1
2159 ; HARD-NEXT: vrev64.16 q8, q8
2160 ; HARD-NEXT: vadd.i16 q8, q8, q8
2161 ; HARD-NEXT: vrev64.16 q0, q8
2162 ; HARD-NEXT: bl test_v2f64_v8i16_helper
2163 ; HARD-NEXT: vadd.f64 d17, d1, d1
2164 ; HARD-NEXT: vadd.f64 d16, d0, d0
2165 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2166 ; HARD-NEXT: pop {r4, pc}
2167 %1 = load <8 x i16>, ptr %p
2168 %2 = add <8 x i16> %1, %1
2169 %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
2170 %4 = fadd <2 x double> %3, %3
2171 store <2 x double> %4, ptr %q
2175 declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
2176 define void @test_v2f64_v16i8(ptr %p, ptr %q) {
2177 ; SOFT-LABEL: test_v2f64_v16i8:
2179 ; SOFT-NEXT: .save {r4, lr}
2180 ; SOFT-NEXT: push {r4, lr}
2181 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2182 ; SOFT-NEXT: mov r4, r1
2183 ; SOFT-NEXT: vrev64.8 q8, q8
2184 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2185 ; SOFT-NEXT: vrev64.8 q8, q8
2186 ; SOFT-NEXT: vmov r1, r0, d16
2187 ; SOFT-NEXT: vmov r3, r2, d17
2188 ; SOFT-NEXT: bl test_v2f64_v16i8_helper
2189 ; SOFT-NEXT: vmov d16, r3, r2
2190 ; SOFT-NEXT: vmov d17, r1, r0
2191 ; SOFT-NEXT: vadd.f64 d19, d16, d16
2192 ; SOFT-NEXT: vadd.f64 d18, d17, d17
2193 ; SOFT-NEXT: vst1.64 {d18, d19}, [r4]
2194 ; SOFT-NEXT: pop {r4, pc}
2196 ; HARD-LABEL: test_v2f64_v16i8:
2198 ; HARD-NEXT: .save {r4, lr}
2199 ; HARD-NEXT: push {r4, lr}
2200 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2201 ; HARD-NEXT: mov r4, r1
2202 ; HARD-NEXT: vrev64.8 q8, q8
2203 ; HARD-NEXT: vadd.i8 q8, q8, q8
2204 ; HARD-NEXT: vrev64.8 q0, q8
2205 ; HARD-NEXT: bl test_v2f64_v16i8_helper
2206 ; HARD-NEXT: vadd.f64 d17, d1, d1
2207 ; HARD-NEXT: vadd.f64 d16, d0, d0
2208 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2209 ; HARD-NEXT: pop {r4, pc}
2210 %1 = load <16 x i8>, ptr %p
2211 %2 = add <16 x i8> %1, %1
2212 %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
2213 %4 = fadd <2 x double> %3, %3
2214 store <2 x double> %4, ptr %q
2218 declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
2219 define void @test_v2i64_f128(ptr %p, ptr %q) {
2220 ; SOFT-LABEL: test_v2i64_f128:
2222 ; SOFT-NEXT: .save {r4, r5, r11, lr}
2223 ; SOFT-NEXT: push {r4, r5, r11, lr}
2224 ; SOFT-NEXT: .pad #16
2225 ; SOFT-NEXT: sub sp, sp, #16
2226 ; SOFT-NEXT: ldr r4, [r0]
2227 ; SOFT-NEXT: mov r5, r1
2228 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
2229 ; SOFT-NEXT: mov r0, r4
2230 ; SOFT-NEXT: str r4, [sp]
2231 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
2232 ; SOFT-NEXT: bl __addtf3
2233 ; SOFT-NEXT: bl test_v2i64_f128_helper
2234 ; SOFT-NEXT: vmov d17, r3, r2
2235 ; SOFT-NEXT: vmov d16, r1, r0
2236 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2237 ; SOFT-NEXT: vst1.64 {d16, d17}, [r5]
2238 ; SOFT-NEXT: add sp, sp, #16
2239 ; SOFT-NEXT: pop {r4, r5, r11, pc}
2241 ; HARD-LABEL: test_v2i64_f128:
2243 ; HARD-NEXT: .save {r4, r5, r11, lr}
2244 ; HARD-NEXT: push {r4, r5, r11, lr}
2245 ; HARD-NEXT: .pad #16
2246 ; HARD-NEXT: sub sp, sp, #16
2247 ; HARD-NEXT: ldr r4, [r0]
2248 ; HARD-NEXT: mov r5, r1
2249 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
2250 ; HARD-NEXT: mov r0, r4
2251 ; HARD-NEXT: str r4, [sp]
2252 ; HARD-NEXT: stmib sp, {r1, r2, r3}
2253 ; HARD-NEXT: bl __addtf3
2254 ; HARD-NEXT: bl test_v2i64_f128_helper
2255 ; HARD-NEXT: vadd.i64 q8, q0, q0
2256 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
2257 ; HARD-NEXT: add sp, sp, #16
2258 ; HARD-NEXT: pop {r4, r5, r11, pc}
2259 %1 = load fp128, ptr %p
2260 %2 = fadd fp128 %1, %1
2261 %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
2262 %4 = add <2 x i64> %3, %3
2263 store <2 x i64> %4, ptr %q
2267 declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
2268 define void @test_v2i64_v2f64(ptr %p, ptr %q) {
2269 ; SOFT-LABEL: test_v2i64_v2f64:
2271 ; SOFT-NEXT: .save {r4, lr}
2272 ; SOFT-NEXT: push {r4, lr}
2273 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2274 ; SOFT-NEXT: mov r4, r1
2275 ; SOFT-NEXT: vadd.f64 d18, d16, d16
2276 ; SOFT-NEXT: vadd.f64 d16, d17, d17
2277 ; SOFT-NEXT: vmov r1, r0, d18
2278 ; SOFT-NEXT: vmov r3, r2, d16
2279 ; SOFT-NEXT: bl test_v2i64_v2f64_helper
2280 ; SOFT-NEXT: vmov d17, r3, r2
2281 ; SOFT-NEXT: vmov d16, r1, r0
2282 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2283 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2284 ; SOFT-NEXT: pop {r4, pc}
2286 ; HARD-LABEL: test_v2i64_v2f64:
2288 ; HARD-NEXT: .save {r4, lr}
2289 ; HARD-NEXT: push {r4, lr}
2290 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2291 ; HARD-NEXT: mov r4, r1
2292 ; HARD-NEXT: vadd.f64 d1, d17, d17
2293 ; HARD-NEXT: vadd.f64 d0, d16, d16
2294 ; HARD-NEXT: bl test_v2i64_v2f64_helper
2295 ; HARD-NEXT: vadd.i64 q8, q0, q0
2296 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2297 ; HARD-NEXT: pop {r4, pc}
2298 %1 = load <2 x double>, ptr %p
2299 %2 = fadd <2 x double> %1, %1
2300 %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
2301 %4 = add <2 x i64> %3, %3
2302 store <2 x i64> %4, ptr %q
2306 declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p)
2307 define void @test_v2i64_v4f32(ptr %p, ptr %q) {
2308 ; SOFT-LABEL: test_v2i64_v4f32:
2310 ; SOFT-NEXT: .save {r4, lr}
2311 ; SOFT-NEXT: push {r4, lr}
2312 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2313 ; SOFT-NEXT: mov r4, r1
2314 ; SOFT-NEXT: vrev64.32 q8, q8
2315 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2316 ; SOFT-NEXT: vrev64.32 q8, q8
2317 ; SOFT-NEXT: vmov r1, r0, d16
2318 ; SOFT-NEXT: vmov r3, r2, d17
2319 ; SOFT-NEXT: bl test_v2i64_v4f32_helper
2320 ; SOFT-NEXT: vmov d17, r3, r2
2321 ; SOFT-NEXT: vmov d16, r1, r0
2322 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2323 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2324 ; SOFT-NEXT: pop {r4, pc}
2326 ; HARD-LABEL: test_v2i64_v4f32:
2328 ; HARD-NEXT: .save {r4, lr}
2329 ; HARD-NEXT: push {r4, lr}
2330 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2331 ; HARD-NEXT: mov r4, r1
2332 ; HARD-NEXT: vrev64.32 q8, q8
2333 ; HARD-NEXT: vadd.f32 q8, q8, q8
2334 ; HARD-NEXT: vrev64.32 q0, q8
2335 ; HARD-NEXT: bl test_v2i64_v4f32_helper
2336 ; HARD-NEXT: vadd.i64 q8, q0, q0
2337 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2338 ; HARD-NEXT: pop {r4, pc}
2339 %1 = load <4 x float>, ptr %p
2340 %2 = fadd <4 x float> %1, %1
2341 %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
2342 %4 = add <2 x i64> %3, %3
2343 store <2 x i64> %4, ptr %q
2347 declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
2348 define void @test_v2i64_v4i32(ptr %p, ptr %q) {
2349 ; SOFT-LABEL: test_v2i64_v4i32:
2351 ; SOFT-NEXT: .save {r4, lr}
2352 ; SOFT-NEXT: push {r4, lr}
2353 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2354 ; SOFT-NEXT: mov r4, r1
2355 ; SOFT-NEXT: vrev64.32 q8, q8
2356 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2357 ; SOFT-NEXT: vrev64.32 q8, q8
2358 ; SOFT-NEXT: vmov r1, r0, d16
2359 ; SOFT-NEXT: vmov r3, r2, d17
2360 ; SOFT-NEXT: bl test_v2i64_v4i32_helper
2361 ; SOFT-NEXT: vmov d17, r3, r2
2362 ; SOFT-NEXT: vmov d16, r1, r0
2363 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2364 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2365 ; SOFT-NEXT: pop {r4, pc}
2367 ; HARD-LABEL: test_v2i64_v4i32:
2369 ; HARD-NEXT: .save {r4, lr}
2370 ; HARD-NEXT: push {r4, lr}
2371 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2372 ; HARD-NEXT: mov r4, r1
2373 ; HARD-NEXT: vrev64.32 q8, q8
2374 ; HARD-NEXT: vadd.i32 q8, q8, q8
2375 ; HARD-NEXT: vrev64.32 q0, q8
2376 ; HARD-NEXT: bl test_v2i64_v4i32_helper
2377 ; HARD-NEXT: vadd.i64 q8, q0, q0
2378 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2379 ; HARD-NEXT: pop {r4, pc}
2380 %1 = load <4 x i32>, ptr %p
2381 %2 = add <4 x i32> %1, %1
2382 %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
2383 %4 = add <2 x i64> %3, %3
2384 store <2 x i64> %4, ptr %q
2388 declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
2389 define void @test_v2i64_v8i16(ptr %p, ptr %q) {
2390 ; SOFT-LABEL: test_v2i64_v8i16:
2392 ; SOFT-NEXT: .save {r4, lr}
2393 ; SOFT-NEXT: push {r4, lr}
2394 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2395 ; SOFT-NEXT: mov r4, r1
2396 ; SOFT-NEXT: vrev64.16 q8, q8
2397 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2398 ; SOFT-NEXT: vrev64.16 q8, q8
2399 ; SOFT-NEXT: vmov r1, r0, d16
2400 ; SOFT-NEXT: vmov r3, r2, d17
2401 ; SOFT-NEXT: bl test_v2i64_v8i16_helper
2402 ; SOFT-NEXT: vmov d17, r3, r2
2403 ; SOFT-NEXT: vmov d16, r1, r0
2404 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2405 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2406 ; SOFT-NEXT: pop {r4, pc}
2408 ; HARD-LABEL: test_v2i64_v8i16:
2410 ; HARD-NEXT: .save {r4, lr}
2411 ; HARD-NEXT: push {r4, lr}
2412 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2413 ; HARD-NEXT: mov r4, r1
2414 ; HARD-NEXT: vrev64.16 q8, q8
2415 ; HARD-NEXT: vadd.i16 q8, q8, q8
2416 ; HARD-NEXT: vrev64.16 q0, q8
2417 ; HARD-NEXT: bl test_v2i64_v8i16_helper
2418 ; HARD-NEXT: vadd.i64 q8, q0, q0
2419 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2420 ; HARD-NEXT: pop {r4, pc}
2421 %1 = load <8 x i16>, ptr %p
2422 %2 = add <8 x i16> %1, %1
2423 %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
2424 %4 = add <2 x i64> %3, %3
2425 store <2 x i64> %4, ptr %q
2429 declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
2430 define void @test_v2i64_v16i8(ptr %p, ptr %q) {
2431 ; SOFT-LABEL: test_v2i64_v16i8:
2433 ; SOFT-NEXT: .save {r4, lr}
2434 ; SOFT-NEXT: push {r4, lr}
2435 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2436 ; SOFT-NEXT: mov r4, r1
2437 ; SOFT-NEXT: vrev64.8 q8, q8
2438 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2439 ; SOFT-NEXT: vrev64.8 q8, q8
2440 ; SOFT-NEXT: vmov r1, r0, d16
2441 ; SOFT-NEXT: vmov r3, r2, d17
2442 ; SOFT-NEXT: bl test_v2i64_v16i8_helper
2443 ; SOFT-NEXT: vmov d17, r3, r2
2444 ; SOFT-NEXT: vmov d16, r1, r0
2445 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2446 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2447 ; SOFT-NEXT: pop {r4, pc}
2449 ; HARD-LABEL: test_v2i64_v16i8:
2451 ; HARD-NEXT: .save {r4, lr}
2452 ; HARD-NEXT: push {r4, lr}
2453 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2454 ; HARD-NEXT: mov r4, r1
2455 ; HARD-NEXT: vrev64.8 q8, q8
2456 ; HARD-NEXT: vadd.i8 q8, q8, q8
2457 ; HARD-NEXT: vrev64.8 q0, q8
2458 ; HARD-NEXT: bl test_v2i64_v16i8_helper
2459 ; HARD-NEXT: vadd.i64 q8, q0, q0
2460 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2461 ; HARD-NEXT: pop {r4, pc}
2462 %1 = load <16 x i8>, ptr %p
2463 %2 = add <16 x i8> %1, %1
2464 %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
2465 %4 = add <2 x i64> %3, %3
2466 store <2 x i64> %4, ptr %q
2470 declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
2471 define void @test_v4f32_f128(ptr %p, ptr %q) {
2472 ; SOFT-LABEL: test_v4f32_f128:
2474 ; SOFT-NEXT: .save {r4, r5, r11, lr}
2475 ; SOFT-NEXT: push {r4, r5, r11, lr}
2476 ; SOFT-NEXT: .pad #16
2477 ; SOFT-NEXT: sub sp, sp, #16
2478 ; SOFT-NEXT: ldr r4, [r0]
2479 ; SOFT-NEXT: mov r5, r1
2480 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
2481 ; SOFT-NEXT: mov r0, r4
2482 ; SOFT-NEXT: str r4, [sp]
2483 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
2484 ; SOFT-NEXT: bl __addtf3
2485 ; SOFT-NEXT: bl test_v4f32_f128_helper
2486 ; SOFT-NEXT: vmov d17, r3, r2
2487 ; SOFT-NEXT: vmov d16, r1, r0
2488 ; SOFT-NEXT: vrev64.32 q8, q8
2489 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2490 ; SOFT-NEXT: vrev64.32 q8, q8
2491 ; SOFT-NEXT: vst1.64 {d16, d17}, [r5]
2492 ; SOFT-NEXT: add sp, sp, #16
2493 ; SOFT-NEXT: pop {r4, r5, r11, pc}
2495 ; HARD-LABEL: test_v4f32_f128:
2497 ; HARD-NEXT: .save {r4, r5, r11, lr}
2498 ; HARD-NEXT: push {r4, r5, r11, lr}
2499 ; HARD-NEXT: .pad #16
2500 ; HARD-NEXT: sub sp, sp, #16
2501 ; HARD-NEXT: ldr r4, [r0]
2502 ; HARD-NEXT: mov r5, r1
2503 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
2504 ; HARD-NEXT: mov r0, r4
2505 ; HARD-NEXT: str r4, [sp]
2506 ; HARD-NEXT: stmib sp, {r1, r2, r3}
2507 ; HARD-NEXT: bl __addtf3
2508 ; HARD-NEXT: bl test_v4f32_f128_helper
2509 ; HARD-NEXT: vrev64.32 q8, q0
2510 ; HARD-NEXT: vadd.f32 q8, q8, q8
2511 ; HARD-NEXT: vrev64.32 q8, q8
2512 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
2513 ; HARD-NEXT: add sp, sp, #16
2514 ; HARD-NEXT: pop {r4, r5, r11, pc}
2515 %1 = load fp128, ptr %p
2516 %2 = fadd fp128 %1, %1
2517 %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
2518 %4 = fadd <4 x float> %3, %3
2519 store <4 x float> %4, ptr %q
2523 declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
2524 define void @test_v4f32_v2f64(ptr %p, ptr %q) {
2525 ; SOFT-LABEL: test_v4f32_v2f64:
2527 ; SOFT-NEXT: .save {r4, lr}
2528 ; SOFT-NEXT: push {r4, lr}
2529 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2530 ; SOFT-NEXT: mov r4, r1
2531 ; SOFT-NEXT: vadd.f64 d18, d16, d16
2532 ; SOFT-NEXT: vadd.f64 d16, d17, d17
2533 ; SOFT-NEXT: vmov r1, r0, d18
2534 ; SOFT-NEXT: vmov r3, r2, d16
2535 ; SOFT-NEXT: bl test_v4f32_v2f64_helper
2536 ; SOFT-NEXT: vmov d17, r3, r2
2537 ; SOFT-NEXT: vmov d16, r1, r0
2538 ; SOFT-NEXT: vrev64.32 q8, q8
2539 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2540 ; SOFT-NEXT: vrev64.32 q8, q8
2541 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2542 ; SOFT-NEXT: pop {r4, pc}
2544 ; HARD-LABEL: test_v4f32_v2f64:
2546 ; HARD-NEXT: .save {r4, lr}
2547 ; HARD-NEXT: push {r4, lr}
2548 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2549 ; HARD-NEXT: mov r4, r1
2550 ; HARD-NEXT: vadd.f64 d1, d17, d17
2551 ; HARD-NEXT: vadd.f64 d0, d16, d16
2552 ; HARD-NEXT: bl test_v4f32_v2f64_helper
2553 ; HARD-NEXT: vrev64.32 q8, q0
2554 ; HARD-NEXT: vadd.f32 q8, q8, q8
2555 ; HARD-NEXT: vrev64.32 q8, q8
2556 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2557 ; HARD-NEXT: pop {r4, pc}
2558 %1 = load <2 x double>, ptr %p
2559 %2 = fadd <2 x double> %1, %1
2560 %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
2561 %4 = fadd <4 x float> %3, %3
2562 store <4 x float> %4, ptr %q
2566 declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
2567 define void @test_v4f32_v2i64(ptr %p, ptr %q) {
2568 ; SOFT-LABEL: test_v4f32_v2i64:
2570 ; SOFT-NEXT: .save {r4, lr}
2571 ; SOFT-NEXT: push {r4, lr}
2572 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2573 ; SOFT-NEXT: mov r4, r1
2574 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2575 ; SOFT-NEXT: vmov r1, r0, d16
2576 ; SOFT-NEXT: vmov r3, r2, d17
2577 ; SOFT-NEXT: bl test_v4f32_v2i64_helper
2578 ; SOFT-NEXT: vmov d17, r3, r2
2579 ; SOFT-NEXT: vmov d16, r1, r0
2580 ; SOFT-NEXT: vrev64.32 q8, q8
2581 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2582 ; SOFT-NEXT: vrev64.32 q8, q8
2583 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2584 ; SOFT-NEXT: pop {r4, pc}
2586 ; HARD-LABEL: test_v4f32_v2i64:
2588 ; HARD-NEXT: .save {r4, lr}
2589 ; HARD-NEXT: push {r4, lr}
2590 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2591 ; HARD-NEXT: mov r4, r1
2592 ; HARD-NEXT: vadd.i64 q0, q8, q8
2593 ; HARD-NEXT: bl test_v4f32_v2i64_helper
2594 ; HARD-NEXT: vrev64.32 q8, q0
2595 ; HARD-NEXT: vadd.f32 q8, q8, q8
2596 ; HARD-NEXT: vrev64.32 q8, q8
2597 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2598 ; HARD-NEXT: pop {r4, pc}
2599 %1 = load <2 x i64>, ptr %p
2600 %2 = add <2 x i64> %1, %1
2601 %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
2602 %4 = fadd <4 x float> %3, %3
2603 store <4 x float> %4, ptr %q
2607 declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
2608 define void @test_v4f32_v4i32(ptr %p, ptr %q) {
2609 ; SOFT-LABEL: test_v4f32_v4i32:
2611 ; SOFT-NEXT: .save {r4, lr}
2612 ; SOFT-NEXT: push {r4, lr}
2613 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2614 ; SOFT-NEXT: mov r4, r1
2615 ; SOFT-NEXT: vrev64.32 q8, q8
2616 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2617 ; SOFT-NEXT: vrev64.32 q8, q8
2618 ; SOFT-NEXT: vmov r1, r0, d16
2619 ; SOFT-NEXT: vmov r3, r2, d17
2620 ; SOFT-NEXT: bl test_v4f32_v4i32_helper
2621 ; SOFT-NEXT: vmov d17, r3, r2
2622 ; SOFT-NEXT: vmov d16, r1, r0
2623 ; SOFT-NEXT: vrev64.32 q8, q8
2624 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2625 ; SOFT-NEXT: vrev64.32 q8, q8
2626 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2627 ; SOFT-NEXT: pop {r4, pc}
2629 ; HARD-LABEL: test_v4f32_v4i32:
2631 ; HARD-NEXT: .save {r4, lr}
2632 ; HARD-NEXT: push {r4, lr}
2633 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2634 ; HARD-NEXT: mov r4, r1
2635 ; HARD-NEXT: vrev64.32 q8, q8
2636 ; HARD-NEXT: vadd.i32 q8, q8, q8
2637 ; HARD-NEXT: vrev64.32 q0, q8
2638 ; HARD-NEXT: bl test_v4f32_v4i32_helper
2639 ; HARD-NEXT: vrev64.32 q8, q0
2640 ; HARD-NEXT: vadd.f32 q8, q8, q8
2641 ; HARD-NEXT: vrev64.32 q8, q8
2642 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2643 ; HARD-NEXT: pop {r4, pc}
2644 %1 = load <4 x i32>, ptr %p
2645 %2 = add <4 x i32> %1, %1
2646 %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
2647 %4 = fadd <4 x float> %3, %3
2648 store <4 x float> %4, ptr %q
2652 declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
2653 define void @test_v4f32_v8i16(ptr %p, ptr %q) {
2654 ; SOFT-LABEL: test_v4f32_v8i16:
2656 ; SOFT-NEXT: .save {r4, lr}
2657 ; SOFT-NEXT: push {r4, lr}
2658 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2659 ; SOFT-NEXT: mov r4, r1
2660 ; SOFT-NEXT: vrev64.16 q8, q8
2661 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2662 ; SOFT-NEXT: vrev64.16 q8, q8
2663 ; SOFT-NEXT: vmov r1, r0, d16
2664 ; SOFT-NEXT: vmov r3, r2, d17
2665 ; SOFT-NEXT: bl test_v4f32_v8i16_helper
2666 ; SOFT-NEXT: vmov d17, r3, r2
2667 ; SOFT-NEXT: vmov d16, r1, r0
2668 ; SOFT-NEXT: vrev64.32 q8, q8
2669 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2670 ; SOFT-NEXT: vrev64.32 q8, q8
2671 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2672 ; SOFT-NEXT: pop {r4, pc}
2674 ; HARD-LABEL: test_v4f32_v8i16:
2676 ; HARD-NEXT: .save {r4, lr}
2677 ; HARD-NEXT: push {r4, lr}
2678 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2679 ; HARD-NEXT: mov r4, r1
2680 ; HARD-NEXT: vrev64.16 q8, q8
2681 ; HARD-NEXT: vadd.i16 q8, q8, q8
2682 ; HARD-NEXT: vrev64.16 q0, q8
2683 ; HARD-NEXT: bl test_v4f32_v8i16_helper
2684 ; HARD-NEXT: vrev64.32 q8, q0
2685 ; HARD-NEXT: vadd.f32 q8, q8, q8
2686 ; HARD-NEXT: vrev64.32 q8, q8
2687 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2688 ; HARD-NEXT: pop {r4, pc}
2689 %1 = load <8 x i16>, ptr %p
2690 %2 = add <8 x i16> %1, %1
2691 %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
2692 %4 = fadd <4 x float> %3, %3
2693 store <4 x float> %4, ptr %q
2697 declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
2698 define void @test_v4f32_v16i8(ptr %p, ptr %q) {
2699 ; SOFT-LABEL: test_v4f32_v16i8:
2701 ; SOFT-NEXT: .save {r4, lr}
2702 ; SOFT-NEXT: push {r4, lr}
2703 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2704 ; SOFT-NEXT: mov r4, r1
2705 ; SOFT-NEXT: vrev64.8 q8, q8
2706 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2707 ; SOFT-NEXT: vrev64.8 q8, q8
2708 ; SOFT-NEXT: vmov r1, r0, d16
2709 ; SOFT-NEXT: vmov r3, r2, d17
2710 ; SOFT-NEXT: bl test_v4f32_v16i8_helper
2711 ; SOFT-NEXT: vmov d17, r3, r2
2712 ; SOFT-NEXT: vmov d16, r1, r0
2713 ; SOFT-NEXT: vrev64.32 q8, q8
2714 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2715 ; SOFT-NEXT: vrev64.32 q8, q8
2716 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2717 ; SOFT-NEXT: pop {r4, pc}
2719 ; HARD-LABEL: test_v4f32_v16i8:
2721 ; HARD-NEXT: .save {r4, lr}
2722 ; HARD-NEXT: push {r4, lr}
2723 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2724 ; HARD-NEXT: mov r4, r1
2725 ; HARD-NEXT: vrev64.8 q8, q8
2726 ; HARD-NEXT: vadd.i8 q8, q8, q8
2727 ; HARD-NEXT: vrev64.8 q0, q8
2728 ; HARD-NEXT: bl test_v4f32_v16i8_helper
2729 ; HARD-NEXT: vrev64.32 q8, q0
2730 ; HARD-NEXT: vadd.f32 q8, q8, q8
2731 ; HARD-NEXT: vrev64.32 q8, q8
2732 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2733 ; HARD-NEXT: pop {r4, pc}
2734 %1 = load <16 x i8>, ptr %p
2735 %2 = add <16 x i8> %1, %1
2736 %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
2737 %4 = fadd <4 x float> %3, %3
2738 store <4 x float> %4, ptr %q
2742 declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
2743 define void @test_v4i32_f128(ptr %p, ptr %q) {
2744 ; SOFT-LABEL: test_v4i32_f128:
2746 ; SOFT-NEXT: .save {r4, r5, r11, lr}
2747 ; SOFT-NEXT: push {r4, r5, r11, lr}
2748 ; SOFT-NEXT: .pad #16
2749 ; SOFT-NEXT: sub sp, sp, #16
2750 ; SOFT-NEXT: ldr r4, [r0]
2751 ; SOFT-NEXT: mov r5, r1
2752 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
2753 ; SOFT-NEXT: mov r0, r4
2754 ; SOFT-NEXT: str r4, [sp]
2755 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
2756 ; SOFT-NEXT: bl __addtf3
2757 ; SOFT-NEXT: bl test_v4i32_f128_helper
2758 ; SOFT-NEXT: vmov d17, r3, r2
2759 ; SOFT-NEXT: vmov d16, r1, r0
2760 ; SOFT-NEXT: vrev64.32 q8, q8
2761 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2762 ; SOFT-NEXT: vrev64.32 q8, q8
2763 ; SOFT-NEXT: vst1.64 {d16, d17}, [r5]
2764 ; SOFT-NEXT: add sp, sp, #16
2765 ; SOFT-NEXT: pop {r4, r5, r11, pc}
2767 ; HARD-LABEL: test_v4i32_f128:
2769 ; HARD-NEXT: .save {r4, r5, r11, lr}
2770 ; HARD-NEXT: push {r4, r5, r11, lr}
2771 ; HARD-NEXT: .pad #16
2772 ; HARD-NEXT: sub sp, sp, #16
2773 ; HARD-NEXT: ldr r4, [r0]
2774 ; HARD-NEXT: mov r5, r1
2775 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
2776 ; HARD-NEXT: mov r0, r4
2777 ; HARD-NEXT: str r4, [sp]
2778 ; HARD-NEXT: stmib sp, {r1, r2, r3}
2779 ; HARD-NEXT: bl __addtf3
2780 ; HARD-NEXT: bl test_v4i32_f128_helper
2781 ; HARD-NEXT: vrev64.32 q8, q0
2782 ; HARD-NEXT: vadd.i32 q8, q8, q8
2783 ; HARD-NEXT: vrev64.32 q8, q8
2784 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
2785 ; HARD-NEXT: add sp, sp, #16
2786 ; HARD-NEXT: pop {r4, r5, r11, pc}
2787 %1 = load fp128, ptr %p
2788 %2 = fadd fp128 %1, %1
2789 %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
2790 %4 = add <4 x i32> %3, %3
2791 store <4 x i32> %4, ptr %q
2795 declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
2796 define void @test_v4i32_v2f64(ptr %p, ptr %q) {
2797 ; SOFT-LABEL: test_v4i32_v2f64:
2799 ; SOFT-NEXT: .save {r4, lr}
2800 ; SOFT-NEXT: push {r4, lr}
2801 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2802 ; SOFT-NEXT: mov r4, r1
2803 ; SOFT-NEXT: vadd.f64 d18, d16, d16
2804 ; SOFT-NEXT: vadd.f64 d16, d17, d17
2805 ; SOFT-NEXT: vmov r1, r0, d18
2806 ; SOFT-NEXT: vmov r3, r2, d16
2807 ; SOFT-NEXT: bl test_v4i32_v2f64_helper
2808 ; SOFT-NEXT: vmov d17, r3, r2
2809 ; SOFT-NEXT: vmov d16, r1, r0
2810 ; SOFT-NEXT: vrev64.32 q8, q8
2811 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2812 ; SOFT-NEXT: vrev64.32 q8, q8
2813 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2814 ; SOFT-NEXT: pop {r4, pc}
2816 ; HARD-LABEL: test_v4i32_v2f64:
2818 ; HARD-NEXT: .save {r4, lr}
2819 ; HARD-NEXT: push {r4, lr}
2820 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2821 ; HARD-NEXT: mov r4, r1
2822 ; HARD-NEXT: vadd.f64 d1, d17, d17
2823 ; HARD-NEXT: vadd.f64 d0, d16, d16
2824 ; HARD-NEXT: bl test_v4i32_v2f64_helper
2825 ; HARD-NEXT: vrev64.32 q8, q0
2826 ; HARD-NEXT: vadd.i32 q8, q8, q8
2827 ; HARD-NEXT: vrev64.32 q8, q8
2828 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2829 ; HARD-NEXT: pop {r4, pc}
2830 %1 = load <2 x double>, ptr %p
2831 %2 = fadd <2 x double> %1, %1
2832 %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
2833 %4 = add <4 x i32> %3, %3
2834 store <4 x i32> %4, ptr %q
2838 declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
2839 define void @test_v4i32_v2i64(ptr %p, ptr %q) {
2840 ; SOFT-LABEL: test_v4i32_v2i64:
2842 ; SOFT-NEXT: .save {r4, lr}
2843 ; SOFT-NEXT: push {r4, lr}
2844 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2845 ; SOFT-NEXT: mov r4, r1
2846 ; SOFT-NEXT: vadd.i64 q8, q8, q8
2847 ; SOFT-NEXT: vmov r1, r0, d16
2848 ; SOFT-NEXT: vmov r3, r2, d17
2849 ; SOFT-NEXT: bl test_v4i32_v2i64_helper
2850 ; SOFT-NEXT: vmov d17, r3, r2
2851 ; SOFT-NEXT: vmov d16, r1, r0
2852 ; SOFT-NEXT: vrev64.32 q8, q8
2853 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2854 ; SOFT-NEXT: vrev64.32 q8, q8
2855 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2856 ; SOFT-NEXT: pop {r4, pc}
2858 ; HARD-LABEL: test_v4i32_v2i64:
2860 ; HARD-NEXT: .save {r4, lr}
2861 ; HARD-NEXT: push {r4, lr}
2862 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2863 ; HARD-NEXT: mov r4, r1
2864 ; HARD-NEXT: vadd.i64 q0, q8, q8
2865 ; HARD-NEXT: bl test_v4i32_v2i64_helper
2866 ; HARD-NEXT: vrev64.32 q8, q0
2867 ; HARD-NEXT: vadd.i32 q8, q8, q8
2868 ; HARD-NEXT: vrev64.32 q8, q8
2869 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2870 ; HARD-NEXT: pop {r4, pc}
2871 %1 = load <2 x i64>, ptr %p
2872 %2 = add <2 x i64> %1, %1
2873 %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
2874 %4 = add <4 x i32> %3, %3
2875 store <4 x i32> %4, ptr %q
2879 declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
2880 define void @test_v4i32_v4f32(ptr %p, ptr %q) {
2881 ; SOFT-LABEL: test_v4i32_v4f32:
2883 ; SOFT-NEXT: .save {r4, lr}
2884 ; SOFT-NEXT: push {r4, lr}
2885 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2886 ; SOFT-NEXT: mov r4, r1
2887 ; SOFT-NEXT: vrev64.32 q8, q8
2888 ; SOFT-NEXT: vadd.f32 q8, q8, q8
2889 ; SOFT-NEXT: vrev64.32 q8, q8
2890 ; SOFT-NEXT: vmov r1, r0, d16
2891 ; SOFT-NEXT: vmov r3, r2, d17
2892 ; SOFT-NEXT: bl test_v4i32_v4f32_helper
2893 ; SOFT-NEXT: vmov d17, r3, r2
2894 ; SOFT-NEXT: vmov d16, r1, r0
2895 ; SOFT-NEXT: vrev64.32 q8, q8
2896 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2897 ; SOFT-NEXT: vrev64.32 q8, q8
2898 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2899 ; SOFT-NEXT: pop {r4, pc}
2901 ; HARD-LABEL: test_v4i32_v4f32:
2903 ; HARD-NEXT: .save {r4, lr}
2904 ; HARD-NEXT: push {r4, lr}
2905 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2906 ; HARD-NEXT: mov r4, r1
2907 ; HARD-NEXT: vrev64.32 q8, q8
2908 ; HARD-NEXT: vadd.f32 q8, q8, q8
2909 ; HARD-NEXT: vrev64.32 q0, q8
2910 ; HARD-NEXT: bl test_v4i32_v4f32_helper
2911 ; HARD-NEXT: vrev64.32 q8, q0
2912 ; HARD-NEXT: vadd.i32 q8, q8, q8
2913 ; HARD-NEXT: vrev64.32 q8, q8
2914 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2915 ; HARD-NEXT: pop {r4, pc}
2916 %1 = load <4 x float>, ptr %p
2917 %2 = fadd <4 x float> %1, %1
2918 %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
2919 %4 = add <4 x i32> %3, %3
2920 store <4 x i32> %4, ptr %q
2924 declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
2925 define void @test_v4i32_v8i16(ptr %p, ptr %q) {
2926 ; SOFT-LABEL: test_v4i32_v8i16:
2928 ; SOFT-NEXT: .save {r4, lr}
2929 ; SOFT-NEXT: push {r4, lr}
2930 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2931 ; SOFT-NEXT: mov r4, r1
2932 ; SOFT-NEXT: vrev64.16 q8, q8
2933 ; SOFT-NEXT: vadd.i16 q8, q8, q8
2934 ; SOFT-NEXT: vrev64.16 q8, q8
2935 ; SOFT-NEXT: vmov r1, r0, d16
2936 ; SOFT-NEXT: vmov r3, r2, d17
2937 ; SOFT-NEXT: bl test_v4i32_v8i16_helper
2938 ; SOFT-NEXT: vmov d17, r3, r2
2939 ; SOFT-NEXT: vmov d16, r1, r0
2940 ; SOFT-NEXT: vrev64.32 q8, q8
2941 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2942 ; SOFT-NEXT: vrev64.32 q8, q8
2943 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2944 ; SOFT-NEXT: pop {r4, pc}
2946 ; HARD-LABEL: test_v4i32_v8i16:
2948 ; HARD-NEXT: .save {r4, lr}
2949 ; HARD-NEXT: push {r4, lr}
2950 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2951 ; HARD-NEXT: mov r4, r1
2952 ; HARD-NEXT: vrev64.16 q8, q8
2953 ; HARD-NEXT: vadd.i16 q8, q8, q8
2954 ; HARD-NEXT: vrev64.16 q0, q8
2955 ; HARD-NEXT: bl test_v4i32_v8i16_helper
2956 ; HARD-NEXT: vrev64.32 q8, q0
2957 ; HARD-NEXT: vadd.i32 q8, q8, q8
2958 ; HARD-NEXT: vrev64.32 q8, q8
2959 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
2960 ; HARD-NEXT: pop {r4, pc}
2961 %1 = load <8 x i16>, ptr %p
2962 %2 = add <8 x i16> %1, %1
2963 %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
2964 %4 = add <4 x i32> %3, %3
2965 store <4 x i32> %4, ptr %q
2969 declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
2970 define void @test_v4i32_v16i8(ptr %p, ptr %q) {
2971 ; SOFT-LABEL: test_v4i32_v16i8:
2973 ; SOFT-NEXT: .save {r4, lr}
2974 ; SOFT-NEXT: push {r4, lr}
2975 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
2976 ; SOFT-NEXT: mov r4, r1
2977 ; SOFT-NEXT: vrev64.8 q8, q8
2978 ; SOFT-NEXT: vadd.i8 q8, q8, q8
2979 ; SOFT-NEXT: vrev64.8 q8, q8
2980 ; SOFT-NEXT: vmov r1, r0, d16
2981 ; SOFT-NEXT: vmov r3, r2, d17
2982 ; SOFT-NEXT: bl test_v4i32_v16i8_helper
2983 ; SOFT-NEXT: vmov d17, r3, r2
2984 ; SOFT-NEXT: vmov d16, r1, r0
2985 ; SOFT-NEXT: vrev64.32 q8, q8
2986 ; SOFT-NEXT: vadd.i32 q8, q8, q8
2987 ; SOFT-NEXT: vrev64.32 q8, q8
2988 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
2989 ; SOFT-NEXT: pop {r4, pc}
2991 ; HARD-LABEL: test_v4i32_v16i8:
2993 ; HARD-NEXT: .save {r4, lr}
2994 ; HARD-NEXT: push {r4, lr}
2995 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
2996 ; HARD-NEXT: mov r4, r1
2997 ; HARD-NEXT: vrev64.8 q8, q8
2998 ; HARD-NEXT: vadd.i8 q8, q8, q8
2999 ; HARD-NEXT: vrev64.8 q0, q8
3000 ; HARD-NEXT: bl test_v4i32_v16i8_helper
3001 ; HARD-NEXT: vrev64.32 q8, q0
3002 ; HARD-NEXT: vadd.i32 q8, q8, q8
3003 ; HARD-NEXT: vrev64.32 q8, q8
3004 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3005 ; HARD-NEXT: pop {r4, pc}
3006 %1 = load <16 x i8>, ptr %p
3007 %2 = add <16 x i8> %1, %1
3008 %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
3009 %4 = add <4 x i32> %3, %3
3010 store <4 x i32> %4, ptr %q
3014 declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
3015 define void @test_v8i16_f128(ptr %p, ptr %q) {
3016 ; SOFT-LABEL: test_v8i16_f128:
3018 ; SOFT-NEXT: .save {r4, r5, r11, lr}
3019 ; SOFT-NEXT: push {r4, r5, r11, lr}
3020 ; SOFT-NEXT: .pad #16
3021 ; SOFT-NEXT: sub sp, sp, #16
3022 ; SOFT-NEXT: ldr r4, [r0]
3023 ; SOFT-NEXT: mov r5, r1
3024 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
3025 ; SOFT-NEXT: mov r0, r4
3026 ; SOFT-NEXT: str r4, [sp]
3027 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
3028 ; SOFT-NEXT: bl __addtf3
3029 ; SOFT-NEXT: bl test_v8i16_f128_helper
3030 ; SOFT-NEXT: vmov d17, r3, r2
3031 ; SOFT-NEXT: vmov d16, r1, r0
3032 ; SOFT-NEXT: vrev64.16 q8, q8
3033 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3034 ; SOFT-NEXT: vrev64.16 q8, q8
3035 ; SOFT-NEXT: vst1.64 {d16, d17}, [r5]
3036 ; SOFT-NEXT: add sp, sp, #16
3037 ; SOFT-NEXT: pop {r4, r5, r11, pc}
3039 ; HARD-LABEL: test_v8i16_f128:
3041 ; HARD-NEXT: .save {r4, r5, r11, lr}
3042 ; HARD-NEXT: push {r4, r5, r11, lr}
3043 ; HARD-NEXT: .pad #16
3044 ; HARD-NEXT: sub sp, sp, #16
3045 ; HARD-NEXT: ldr r4, [r0]
3046 ; HARD-NEXT: mov r5, r1
3047 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
3048 ; HARD-NEXT: mov r0, r4
3049 ; HARD-NEXT: str r4, [sp]
3050 ; HARD-NEXT: stmib sp, {r1, r2, r3}
3051 ; HARD-NEXT: bl __addtf3
3052 ; HARD-NEXT: bl test_v8i16_f128_helper
3053 ; HARD-NEXT: vrev64.16 q8, q0
3054 ; HARD-NEXT: vadd.i16 q8, q8, q8
3055 ; HARD-NEXT: vrev64.16 q8, q8
3056 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
3057 ; HARD-NEXT: add sp, sp, #16
3058 ; HARD-NEXT: pop {r4, r5, r11, pc}
3059 %1 = load fp128, ptr %p
3060 %2 = fadd fp128 %1, %1
3061 %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
3062 %4 = add <8 x i16> %3, %3
3063 store <8 x i16> %4, ptr %q
3067 declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
3068 define void @test_v8i16_v2f64(ptr %p, ptr %q) {
3069 ; SOFT-LABEL: test_v8i16_v2f64:
3071 ; SOFT-NEXT: .save {r4, lr}
3072 ; SOFT-NEXT: push {r4, lr}
3073 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3074 ; SOFT-NEXT: mov r4, r1
3075 ; SOFT-NEXT: vadd.f64 d18, d16, d16
3076 ; SOFT-NEXT: vadd.f64 d16, d17, d17
3077 ; SOFT-NEXT: vmov r1, r0, d18
3078 ; SOFT-NEXT: vmov r3, r2, d16
3079 ; SOFT-NEXT: bl test_v8i16_v2f64_helper
3080 ; SOFT-NEXT: vmov d17, r3, r2
3081 ; SOFT-NEXT: vmov d16, r1, r0
3082 ; SOFT-NEXT: vrev64.16 q8, q8
3083 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3084 ; SOFT-NEXT: vrev64.16 q8, q8
3085 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3086 ; SOFT-NEXT: pop {r4, pc}
3088 ; HARD-LABEL: test_v8i16_v2f64:
3090 ; HARD-NEXT: .save {r4, lr}
3091 ; HARD-NEXT: push {r4, lr}
3092 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3093 ; HARD-NEXT: mov r4, r1
3094 ; HARD-NEXT: vadd.f64 d1, d17, d17
3095 ; HARD-NEXT: vadd.f64 d0, d16, d16
3096 ; HARD-NEXT: bl test_v8i16_v2f64_helper
3097 ; HARD-NEXT: vrev64.16 q8, q0
3098 ; HARD-NEXT: vadd.i16 q8, q8, q8
3099 ; HARD-NEXT: vrev64.16 q8, q8
3100 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3101 ; HARD-NEXT: pop {r4, pc}
3102 %1 = load <2 x double>, ptr %p
3103 %2 = fadd <2 x double> %1, %1
3104 %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
3105 %4 = add <8 x i16> %3, %3
3106 store <8 x i16> %4, ptr %q
3110 declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
3111 define void @test_v8i16_v2i64(ptr %p, ptr %q) {
3112 ; SOFT-LABEL: test_v8i16_v2i64:
3114 ; SOFT-NEXT: .save {r4, lr}
3115 ; SOFT-NEXT: push {r4, lr}
3116 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3117 ; SOFT-NEXT: mov r4, r1
3118 ; SOFT-NEXT: vadd.i64 q8, q8, q8
3119 ; SOFT-NEXT: vmov r1, r0, d16
3120 ; SOFT-NEXT: vmov r3, r2, d17
3121 ; SOFT-NEXT: bl test_v8i16_v2i64_helper
3122 ; SOFT-NEXT: vmov d17, r3, r2
3123 ; SOFT-NEXT: vmov d16, r1, r0
3124 ; SOFT-NEXT: vrev64.16 q8, q8
3125 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3126 ; SOFT-NEXT: vrev64.16 q8, q8
3127 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3128 ; SOFT-NEXT: pop {r4, pc}
3130 ; HARD-LABEL: test_v8i16_v2i64:
3132 ; HARD-NEXT: .save {r4, lr}
3133 ; HARD-NEXT: push {r4, lr}
3134 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3135 ; HARD-NEXT: mov r4, r1
3136 ; HARD-NEXT: vadd.i64 q0, q8, q8
3137 ; HARD-NEXT: bl test_v8i16_v2i64_helper
3138 ; HARD-NEXT: vrev64.16 q8, q0
3139 ; HARD-NEXT: vadd.i16 q8, q8, q8
3140 ; HARD-NEXT: vrev64.16 q8, q8
3141 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3142 ; HARD-NEXT: pop {r4, pc}
3143 %1 = load <2 x i64>, ptr %p
3144 %2 = add <2 x i64> %1, %1
3145 %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
3146 %4 = add <8 x i16> %3, %3
3147 store <8 x i16> %4, ptr %q
3151 declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
3152 define void @test_v8i16_v4f32(ptr %p, ptr %q) {
3153 ; SOFT-LABEL: test_v8i16_v4f32:
3155 ; SOFT-NEXT: .save {r4, lr}
3156 ; SOFT-NEXT: push {r4, lr}
3157 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3158 ; SOFT-NEXT: mov r4, r1
3159 ; SOFT-NEXT: vrev64.32 q8, q8
3160 ; SOFT-NEXT: vadd.f32 q8, q8, q8
3161 ; SOFT-NEXT: vrev64.32 q8, q8
3162 ; SOFT-NEXT: vmov r1, r0, d16
3163 ; SOFT-NEXT: vmov r3, r2, d17
3164 ; SOFT-NEXT: bl test_v8i16_v4f32_helper
3165 ; SOFT-NEXT: vmov d17, r3, r2
3166 ; SOFT-NEXT: vmov d16, r1, r0
3167 ; SOFT-NEXT: vrev64.16 q8, q8
3168 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3169 ; SOFT-NEXT: vrev64.16 q8, q8
3170 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3171 ; SOFT-NEXT: pop {r4, pc}
3173 ; HARD-LABEL: test_v8i16_v4f32:
3175 ; HARD-NEXT: .save {r4, lr}
3176 ; HARD-NEXT: push {r4, lr}
3177 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3178 ; HARD-NEXT: mov r4, r1
3179 ; HARD-NEXT: vrev64.32 q8, q8
3180 ; HARD-NEXT: vadd.f32 q8, q8, q8
3181 ; HARD-NEXT: vrev64.32 q0, q8
3182 ; HARD-NEXT: bl test_v8i16_v4f32_helper
3183 ; HARD-NEXT: vrev64.16 q8, q0
3184 ; HARD-NEXT: vadd.i16 q8, q8, q8
3185 ; HARD-NEXT: vrev64.16 q8, q8
3186 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3187 ; HARD-NEXT: pop {r4, pc}
3188 %1 = load <4 x float>, ptr %p
3189 %2 = fadd <4 x float> %1, %1
3190 %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
3191 %4 = add <8 x i16> %3, %3
3192 store <8 x i16> %4, ptr %q
3196 declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
3197 define void @test_v8i16_v4i32(ptr %p, ptr %q) {
3198 ; SOFT-LABEL: test_v8i16_v4i32:
3200 ; SOFT-NEXT: .save {r4, lr}
3201 ; SOFT-NEXT: push {r4, lr}
3202 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3203 ; SOFT-NEXT: mov r4, r1
3204 ; SOFT-NEXT: vrev64.32 q8, q8
3205 ; SOFT-NEXT: vadd.i32 q8, q8, q8
3206 ; SOFT-NEXT: vrev64.32 q8, q8
3207 ; SOFT-NEXT: vmov r1, r0, d16
3208 ; SOFT-NEXT: vmov r3, r2, d17
3209 ; SOFT-NEXT: bl test_v8i16_v4i32_helper
3210 ; SOFT-NEXT: vmov d17, r3, r2
3211 ; SOFT-NEXT: vmov d16, r1, r0
3212 ; SOFT-NEXT: vrev64.16 q8, q8
3213 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3214 ; SOFT-NEXT: vrev64.16 q8, q8
3215 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3216 ; SOFT-NEXT: pop {r4, pc}
3218 ; HARD-LABEL: test_v8i16_v4i32:
3220 ; HARD-NEXT: .save {r4, lr}
3221 ; HARD-NEXT: push {r4, lr}
3222 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3223 ; HARD-NEXT: mov r4, r1
3224 ; HARD-NEXT: vrev64.32 q8, q8
3225 ; HARD-NEXT: vadd.i32 q8, q8, q8
3226 ; HARD-NEXT: vrev64.32 q0, q8
3227 ; HARD-NEXT: bl test_v8i16_v4i32_helper
3228 ; HARD-NEXT: vrev64.16 q8, q0
3229 ; HARD-NEXT: vadd.i16 q8, q8, q8
3230 ; HARD-NEXT: vrev64.16 q8, q8
3231 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3232 ; HARD-NEXT: pop {r4, pc}
3233 %1 = load <4 x i32>, ptr %p
3234 %2 = add <4 x i32> %1, %1
3235 %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
3236 %4 = add <8 x i16> %3, %3
3237 store <8 x i16> %4, ptr %q
3241 declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
3242 define void @test_v8i16_v16i8(ptr %p, ptr %q) {
3243 ; SOFT-LABEL: test_v8i16_v16i8:
3245 ; SOFT-NEXT: .save {r4, lr}
3246 ; SOFT-NEXT: push {r4, lr}
3247 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3248 ; SOFT-NEXT: mov r4, r1
3249 ; SOFT-NEXT: vrev64.8 q8, q8
3250 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3251 ; SOFT-NEXT: vrev64.8 q8, q8
3252 ; SOFT-NEXT: vmov r1, r0, d16
3253 ; SOFT-NEXT: vmov r3, r2, d17
3254 ; SOFT-NEXT: bl test_v8i16_v16i8_helper
3255 ; SOFT-NEXT: vmov d17, r3, r2
3256 ; SOFT-NEXT: vmov d16, r1, r0
3257 ; SOFT-NEXT: vrev64.16 q8, q8
3258 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3259 ; SOFT-NEXT: vrev64.16 q8, q8
3260 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3261 ; SOFT-NEXT: pop {r4, pc}
3263 ; HARD-LABEL: test_v8i16_v16i8:
3265 ; HARD-NEXT: .save {r4, lr}
3266 ; HARD-NEXT: push {r4, lr}
3267 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3268 ; HARD-NEXT: mov r4, r1
3269 ; HARD-NEXT: vrev64.8 q8, q8
3270 ; HARD-NEXT: vadd.i8 q8, q8, q8
3271 ; HARD-NEXT: vrev64.8 q0, q8
3272 ; HARD-NEXT: bl test_v8i16_v16i8_helper
3273 ; HARD-NEXT: vrev64.16 q8, q0
3274 ; HARD-NEXT: vadd.i16 q8, q8, q8
3275 ; HARD-NEXT: vrev64.16 q8, q8
3276 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3277 ; HARD-NEXT: pop {r4, pc}
3278 %1 = load <16 x i8>, ptr %p
3279 %2 = add <16 x i8> %1, %1
3280 %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
3281 %4 = add <8 x i16> %3, %3
3282 store <8 x i16> %4, ptr %q
3286 declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
3287 define void @test_v16i8_f128(ptr %p, ptr %q) {
3288 ; SOFT-LABEL: test_v16i8_f128:
3290 ; SOFT-NEXT: .save {r4, r5, r11, lr}
3291 ; SOFT-NEXT: push {r4, r5, r11, lr}
3292 ; SOFT-NEXT: .pad #16
3293 ; SOFT-NEXT: sub sp, sp, #16
3294 ; SOFT-NEXT: ldr r4, [r0]
3295 ; SOFT-NEXT: mov r5, r1
3296 ; SOFT-NEXT: ldmib r0, {r1, r2, r3}
3297 ; SOFT-NEXT: mov r0, r4
3298 ; SOFT-NEXT: str r4, [sp]
3299 ; SOFT-NEXT: stmib sp, {r1, r2, r3}
3300 ; SOFT-NEXT: bl __addtf3
3301 ; SOFT-NEXT: bl test_v16i8_f128_helper
3302 ; SOFT-NEXT: vmov d17, r3, r2
3303 ; SOFT-NEXT: vmov d16, r1, r0
3304 ; SOFT-NEXT: vrev64.8 q8, q8
3305 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3306 ; SOFT-NEXT: vrev64.8 q8, q8
3307 ; SOFT-NEXT: vst1.64 {d16, d17}, [r5]
3308 ; SOFT-NEXT: add sp, sp, #16
3309 ; SOFT-NEXT: pop {r4, r5, r11, pc}
3311 ; HARD-LABEL: test_v16i8_f128:
3313 ; HARD-NEXT: .save {r4, r5, r11, lr}
3314 ; HARD-NEXT: push {r4, r5, r11, lr}
3315 ; HARD-NEXT: .pad #16
3316 ; HARD-NEXT: sub sp, sp, #16
3317 ; HARD-NEXT: ldr r4, [r0]
3318 ; HARD-NEXT: mov r5, r1
3319 ; HARD-NEXT: ldmib r0, {r1, r2, r3}
3320 ; HARD-NEXT: mov r0, r4
3321 ; HARD-NEXT: str r4, [sp]
3322 ; HARD-NEXT: stmib sp, {r1, r2, r3}
3323 ; HARD-NEXT: bl __addtf3
3324 ; HARD-NEXT: bl test_v16i8_f128_helper
3325 ; HARD-NEXT: vrev64.8 q8, q0
3326 ; HARD-NEXT: vadd.i8 q8, q8, q8
3327 ; HARD-NEXT: vrev64.8 q8, q8
3328 ; HARD-NEXT: vst1.64 {d16, d17}, [r5]
3329 ; HARD-NEXT: add sp, sp, #16
3330 ; HARD-NEXT: pop {r4, r5, r11, pc}
3331 %1 = load fp128, ptr %p
3332 %2 = fadd fp128 %1, %1
3333 %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
3334 %4 = add <16 x i8> %3, %3
3335 store <16 x i8> %4, ptr %q
3339 declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
3340 define void @test_v16i8_v2f64(ptr %p, ptr %q) {
3341 ; SOFT-LABEL: test_v16i8_v2f64:
3343 ; SOFT-NEXT: .save {r4, lr}
3344 ; SOFT-NEXT: push {r4, lr}
3345 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3346 ; SOFT-NEXT: mov r4, r1
3347 ; SOFT-NEXT: vadd.f64 d18, d16, d16
3348 ; SOFT-NEXT: vadd.f64 d16, d17, d17
3349 ; SOFT-NEXT: vmov r1, r0, d18
3350 ; SOFT-NEXT: vmov r3, r2, d16
3351 ; SOFT-NEXT: bl test_v16i8_v2f64_helper
3352 ; SOFT-NEXT: vmov d17, r3, r2
3353 ; SOFT-NEXT: vmov d16, r1, r0
3354 ; SOFT-NEXT: vrev64.8 q8, q8
3355 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3356 ; SOFT-NEXT: vrev64.8 q8, q8
3357 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3358 ; SOFT-NEXT: pop {r4, pc}
3360 ; HARD-LABEL: test_v16i8_v2f64:
3362 ; HARD-NEXT: .save {r4, lr}
3363 ; HARD-NEXT: push {r4, lr}
3364 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3365 ; HARD-NEXT: mov r4, r1
3366 ; HARD-NEXT: vadd.f64 d1, d17, d17
3367 ; HARD-NEXT: vadd.f64 d0, d16, d16
3368 ; HARD-NEXT: bl test_v16i8_v2f64_helper
3369 ; HARD-NEXT: vrev64.8 q8, q0
3370 ; HARD-NEXT: vadd.i8 q8, q8, q8
3371 ; HARD-NEXT: vrev64.8 q8, q8
3372 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3373 ; HARD-NEXT: pop {r4, pc}
3374 %1 = load <2 x double>, ptr %p
3375 %2 = fadd <2 x double> %1, %1
3376 %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
3377 %4 = add <16 x i8> %3, %3
3378 store <16 x i8> %4, ptr %q
3382 declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
3383 define void @test_v16i8_v2i64(ptr %p, ptr %q) {
3384 ; SOFT-LABEL: test_v16i8_v2i64:
3386 ; SOFT-NEXT: .save {r4, lr}
3387 ; SOFT-NEXT: push {r4, lr}
3388 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3389 ; SOFT-NEXT: mov r4, r1
3390 ; SOFT-NEXT: vadd.i64 q8, q8, q8
3391 ; SOFT-NEXT: vmov r1, r0, d16
3392 ; SOFT-NEXT: vmov r3, r2, d17
3393 ; SOFT-NEXT: bl test_v16i8_v2i64_helper
3394 ; SOFT-NEXT: vmov d17, r3, r2
3395 ; SOFT-NEXT: vmov d16, r1, r0
3396 ; SOFT-NEXT: vrev64.8 q8, q8
3397 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3398 ; SOFT-NEXT: vrev64.8 q8, q8
3399 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3400 ; SOFT-NEXT: pop {r4, pc}
3402 ; HARD-LABEL: test_v16i8_v2i64:
3404 ; HARD-NEXT: .save {r4, lr}
3405 ; HARD-NEXT: push {r4, lr}
3406 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3407 ; HARD-NEXT: mov r4, r1
3408 ; HARD-NEXT: vadd.i64 q0, q8, q8
3409 ; HARD-NEXT: bl test_v16i8_v2i64_helper
3410 ; HARD-NEXT: vrev64.8 q8, q0
3411 ; HARD-NEXT: vadd.i8 q8, q8, q8
3412 ; HARD-NEXT: vrev64.8 q8, q8
3413 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3414 ; HARD-NEXT: pop {r4, pc}
3415 %1 = load <2 x i64>, ptr %p
3416 %2 = add <2 x i64> %1, %1
3417 %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
3418 %4 = add <16 x i8> %3, %3
3419 store <16 x i8> %4, ptr %q
3423 declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
3424 define void @test_v16i8_v4f32(ptr %p, ptr %q) {
3425 ; SOFT-LABEL: test_v16i8_v4f32:
3427 ; SOFT-NEXT: .save {r4, lr}
3428 ; SOFT-NEXT: push {r4, lr}
3429 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3430 ; SOFT-NEXT: mov r4, r1
3431 ; SOFT-NEXT: vrev64.32 q8, q8
3432 ; SOFT-NEXT: vadd.f32 q8, q8, q8
3433 ; SOFT-NEXT: vrev64.32 q8, q8
3434 ; SOFT-NEXT: vmov r1, r0, d16
3435 ; SOFT-NEXT: vmov r3, r2, d17
3436 ; SOFT-NEXT: bl test_v16i8_v4f32_helper
3437 ; SOFT-NEXT: vmov d17, r3, r2
3438 ; SOFT-NEXT: vmov d16, r1, r0
3439 ; SOFT-NEXT: vrev64.8 q8, q8
3440 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3441 ; SOFT-NEXT: vrev64.8 q8, q8
3442 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3443 ; SOFT-NEXT: pop {r4, pc}
3445 ; HARD-LABEL: test_v16i8_v4f32:
3447 ; HARD-NEXT: .save {r4, lr}
3448 ; HARD-NEXT: push {r4, lr}
3449 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3450 ; HARD-NEXT: mov r4, r1
3451 ; HARD-NEXT: vrev64.32 q8, q8
3452 ; HARD-NEXT: vadd.f32 q8, q8, q8
3453 ; HARD-NEXT: vrev64.32 q0, q8
3454 ; HARD-NEXT: bl test_v16i8_v4f32_helper
3455 ; HARD-NEXT: vrev64.8 q8, q0
3456 ; HARD-NEXT: vadd.i8 q8, q8, q8
3457 ; HARD-NEXT: vrev64.8 q8, q8
3458 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3459 ; HARD-NEXT: pop {r4, pc}
3460 %1 = load <4 x float>, ptr %p
3461 %2 = fadd <4 x float> %1, %1
3462 %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
3463 %4 = add <16 x i8> %3, %3
3464 store <16 x i8> %4, ptr %q
3468 declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
3469 define void @test_v16i8_v4i32(ptr %p, ptr %q) {
3470 ; SOFT-LABEL: test_v16i8_v4i32:
3472 ; SOFT-NEXT: .save {r4, lr}
3473 ; SOFT-NEXT: push {r4, lr}
3474 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3475 ; SOFT-NEXT: mov r4, r1
3476 ; SOFT-NEXT: vrev64.32 q8, q8
3477 ; SOFT-NEXT: vadd.i32 q8, q8, q8
3478 ; SOFT-NEXT: vrev64.32 q8, q8
3479 ; SOFT-NEXT: vmov r1, r0, d16
3480 ; SOFT-NEXT: vmov r3, r2, d17
3481 ; SOFT-NEXT: bl test_v16i8_v4i32_helper
3482 ; SOFT-NEXT: vmov d17, r3, r2
3483 ; SOFT-NEXT: vmov d16, r1, r0
3484 ; SOFT-NEXT: vrev64.8 q8, q8
3485 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3486 ; SOFT-NEXT: vrev64.8 q8, q8
3487 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3488 ; SOFT-NEXT: pop {r4, pc}
3490 ; HARD-LABEL: test_v16i8_v4i32:
3492 ; HARD-NEXT: .save {r4, lr}
3493 ; HARD-NEXT: push {r4, lr}
3494 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3495 ; HARD-NEXT: mov r4, r1
3496 ; HARD-NEXT: vrev64.32 q8, q8
3497 ; HARD-NEXT: vadd.i32 q8, q8, q8
3498 ; HARD-NEXT: vrev64.32 q0, q8
3499 ; HARD-NEXT: bl test_v16i8_v4i32_helper
3500 ; HARD-NEXT: vrev64.8 q8, q0
3501 ; HARD-NEXT: vadd.i8 q8, q8, q8
3502 ; HARD-NEXT: vrev64.8 q8, q8
3503 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3504 ; HARD-NEXT: pop {r4, pc}
3505 %1 = load <4 x i32>, ptr %p
3506 %2 = add <4 x i32> %1, %1
3507 %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
3508 %4 = add <16 x i8> %3, %3
3509 store <16 x i8> %4, ptr %q
3513 declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
3514 define void @test_v16i8_v8i16(ptr %p, ptr %q) {
3515 ; SOFT-LABEL: test_v16i8_v8i16:
3517 ; SOFT-NEXT: .save {r4, lr}
3518 ; SOFT-NEXT: push {r4, lr}
3519 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0]
3520 ; SOFT-NEXT: mov r4, r1
3521 ; SOFT-NEXT: vrev64.16 q8, q8
3522 ; SOFT-NEXT: vadd.i16 q8, q8, q8
3523 ; SOFT-NEXT: vrev64.16 q8, q8
3524 ; SOFT-NEXT: vmov r1, r0, d16
3525 ; SOFT-NEXT: vmov r3, r2, d17
3526 ; SOFT-NEXT: bl test_v16i8_v8i16_helper
3527 ; SOFT-NEXT: vmov d17, r3, r2
3528 ; SOFT-NEXT: vmov d16, r1, r0
3529 ; SOFT-NEXT: vrev64.8 q8, q8
3530 ; SOFT-NEXT: vadd.i8 q8, q8, q8
3531 ; SOFT-NEXT: vrev64.8 q8, q8
3532 ; SOFT-NEXT: vst1.64 {d16, d17}, [r4]
3533 ; SOFT-NEXT: pop {r4, pc}
3535 ; HARD-LABEL: test_v16i8_v8i16:
3537 ; HARD-NEXT: .save {r4, lr}
3538 ; HARD-NEXT: push {r4, lr}
3539 ; HARD-NEXT: vld1.64 {d16, d17}, [r0]
3540 ; HARD-NEXT: mov r4, r1
3541 ; HARD-NEXT: vrev64.16 q8, q8
3542 ; HARD-NEXT: vadd.i16 q8, q8, q8
3543 ; HARD-NEXT: vrev64.16 q0, q8
3544 ; HARD-NEXT: bl test_v16i8_v8i16_helper
3545 ; HARD-NEXT: vrev64.8 q8, q0
3546 ; HARD-NEXT: vadd.i8 q8, q8, q8
3547 ; HARD-NEXT: vrev64.8 q8, q8
3548 ; HARD-NEXT: vst1.64 {d16, d17}, [r4]
3549 ; HARD-NEXT: pop {r4, pc}
3550 %1 = load <8 x i16>, ptr %p
3551 %2 = add <8 x i16> %1, %1
3552 %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
3553 %4 = add <16 x i8> %3, %3
3554 store <16 x i8> %4, ptr %q