1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x) {
5 ; CHECK-LABEL: add_v4i32_v4i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vaddv.u32 r0, q0
10 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
14 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
15 ; CHECK-LABEL: add_v4i32_v4i64_zext:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vaddlv.u32 r0, r1, q0
20 %xx = zext <4 x i32> %x to <4 x i64>
21 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
25 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
26 ; CHECK-LABEL: add_v4i32_v4i64_sext:
27 ; CHECK: @ %bb.0: @ %entry
28 ; CHECK-NEXT: vaddlv.s32 r0, r1, q0
31 %xx = sext <4 x i32> %x to <4 x i64>
32 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
36 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) {
37 ; CHECK-LABEL: add_v2i32_v2i64_zext:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vmov.i64 q1, #0xffffffff
40 ; CHECK-NEXT: vand q0, q0, q1
41 ; CHECK-NEXT: vmov r0, r1, d1
42 ; CHECK-NEXT: vmov r2, r3, d0
43 ; CHECK-NEXT: adds r0, r0, r2
44 ; CHECK-NEXT: adcs r1, r3
47 %xx = zext <2 x i32> %x to <2 x i64>
48 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
52 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x) {
53 ; CHECK-LABEL: add_v2i32_v2i64_sext:
54 ; CHECK: @ %bb.0: @ %entry
55 ; CHECK-NEXT: vmov r0, s0
56 ; CHECK-NEXT: vmov r2, s2
57 ; CHECK-NEXT: asrs r1, r0, #31
58 ; CHECK-NEXT: adds r0, r0, r2
59 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
62 %xx = sext <2 x i32> %x to <2 x i64>
63 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
67 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x) {
68 ; CHECK-LABEL: add_v8i16_v8i32_zext:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: vaddv.u16 r0, q0
73 %xx = zext <8 x i16> %x to <8 x i32>
74 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
78 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
79 ; CHECK-LABEL: add_v8i16_v8i32_sext:
80 ; CHECK: @ %bb.0: @ %entry
81 ; CHECK-NEXT: vaddv.s16 r0, q0
84 %xx = sext <8 x i16> %x to <8 x i32>
85 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
89 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x) {
90 ; CHECK-LABEL: add_v4i16_v4i32_zext:
91 ; CHECK: @ %bb.0: @ %entry
92 ; CHECK-NEXT: vmovlb.u16 q0, q0
93 ; CHECK-NEXT: vaddv.u32 r0, q0
96 %xx = zext <4 x i16> %x to <4 x i32>
97 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
101 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x) {
102 ; CHECK-LABEL: add_v4i16_v4i32_sext:
103 ; CHECK: @ %bb.0: @ %entry
104 ; CHECK-NEXT: vmovlb.s16 q0, q0
105 ; CHECK-NEXT: vaddv.u32 r0, q0
108 %xx = sext <4 x i16> %x to <4 x i32>
109 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
113 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) {
114 ; CHECK-LABEL: add_v8i16_v8i16:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vaddv.u16 r0, q0
117 ; CHECK-NEXT: uxth r0, r0
120 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
124 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
125 ; CHECK-LABEL: add_v8i16_v8i64_zext:
126 ; CHECK: @ %bb.0: @ %entry
127 ; CHECK-NEXT: vmov.u16 r0, q0[1]
128 ; CHECK-NEXT: vmov.u16 r1, q0[0]
129 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
130 ; CHECK-NEXT: vmov.i64 q1, #0xffff
131 ; CHECK-NEXT: vand q2, q2, q1
132 ; CHECK-NEXT: vmov.u16 r3, q0[2]
133 ; CHECK-NEXT: vmov r0, s10
134 ; CHECK-NEXT: vmov r1, r2, d4
135 ; CHECK-NEXT: add r0, r1
136 ; CHECK-NEXT: vmov.u16 r1, q0[3]
137 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
138 ; CHECK-NEXT: vmov.u16 r3, q0[4]
139 ; CHECK-NEXT: vand q2, q2, q1
140 ; CHECK-NEXT: vmov r1, s8
141 ; CHECK-NEXT: add r0, r1
142 ; CHECK-NEXT: vmov r1, s10
143 ; CHECK-NEXT: add r0, r1
144 ; CHECK-NEXT: vmov.u16 r1, q0[5]
145 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
146 ; CHECK-NEXT: vand q2, q2, q1
147 ; CHECK-NEXT: vmov r1, s8
148 ; CHECK-NEXT: add r0, r1
149 ; CHECK-NEXT: vmov r1, r3, d5
150 ; CHECK-NEXT: adds r0, r0, r1
151 ; CHECK-NEXT: adc.w r1, r2, r3
152 ; CHECK-NEXT: vmov.u16 r2, q0[7]
153 ; CHECK-NEXT: vmov.u16 r3, q0[6]
154 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
155 ; CHECK-NEXT: vand q0, q0, q1
156 ; CHECK-NEXT: vmov r2, r3, d0
157 ; CHECK-NEXT: adds r0, r0, r2
158 ; CHECK-NEXT: adcs r1, r3
159 ; CHECK-NEXT: vmov r2, r3, d1
160 ; CHECK-NEXT: adds r0, r0, r2
161 ; CHECK-NEXT: adcs r1, r3
164 %xx = zext <8 x i16> %x to <8 x i64>
165 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
169 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
170 ; CHECK-LABEL: add_v8i16_v8i64_sext:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vmov.s16 r0, q0[0]
173 ; CHECK-NEXT: vmov.s16 r2, q0[1]
174 ; CHECK-NEXT: asrs r1, r0, #31
175 ; CHECK-NEXT: adds r0, r0, r2
176 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
177 ; CHECK-NEXT: vmov.s16 r2, q0[2]
178 ; CHECK-NEXT: adds r0, r0, r2
179 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
180 ; CHECK-NEXT: vmov.s16 r2, q0[3]
181 ; CHECK-NEXT: adds r0, r0, r2
182 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
183 ; CHECK-NEXT: vmov.s16 r2, q0[4]
184 ; CHECK-NEXT: adds r0, r0, r2
185 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
186 ; CHECK-NEXT: vmov.s16 r2, q0[5]
187 ; CHECK-NEXT: adds r0, r0, r2
188 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
189 ; CHECK-NEXT: vmov.s16 r2, q0[6]
190 ; CHECK-NEXT: adds r0, r0, r2
191 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
192 ; CHECK-NEXT: vmov.s16 r2, q0[7]
193 ; CHECK-NEXT: adds r0, r0, r2
194 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
197 %xx = sext <8 x i16> %x to <8 x i64>
198 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
202 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x) {
203 ; CHECK-LABEL: add_v4i16_v4i64_zext:
204 ; CHECK: @ %bb.0: @ %entry
205 ; CHECK-NEXT: vmovlb.u16 q0, q0
206 ; CHECK-NEXT: vaddlv.u32 r0, r1, q0
209 %xx = zext <4 x i16> %x to <4 x i64>
210 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
214 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
215 ; CHECK-LABEL: add_v4i16_v4i64_sext:
216 ; CHECK: @ %bb.0: @ %entry
217 ; CHECK-NEXT: vmovlb.s16 q0, q0
218 ; CHECK-NEXT: vaddlv.s32 r0, r1, q0
221 %xx = sext <4 x i16> %x to <4 x i64>
222 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
226 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
227 ; CHECK-LABEL: add_v2i16_v2i64_zext:
228 ; CHECK: @ %bb.0: @ %entry
229 ; CHECK-NEXT: vmov.i64 q1, #0xffff
230 ; CHECK-NEXT: vand q0, q0, q1
231 ; CHECK-NEXT: vmov r0, s2
232 ; CHECK-NEXT: vmov r2, r1, d0
233 ; CHECK-NEXT: add r0, r2
236 %xx = zext <2 x i16> %x to <2 x i64>
237 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
241 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x) {
242 ; CHECK-LABEL: add_v2i16_v2i64_sext:
243 ; CHECK: @ %bb.0: @ %entry
244 ; CHECK-NEXT: vmov r0, s0
245 ; CHECK-NEXT: vmov r2, s2
246 ; CHECK-NEXT: sxth r0, r0
247 ; CHECK-NEXT: asrs r1, r0, #31
248 ; CHECK-NEXT: sxth r2, r2
249 ; CHECK-NEXT: adds r0, r0, r2
250 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
253 %xx = sext <2 x i16> %x to <2 x i64>
254 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
258 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x) {
259 ; CHECK-LABEL: add_v16i8_v16i32_zext:
260 ; CHECK: @ %bb.0: @ %entry
261 ; CHECK-NEXT: vaddv.u8 r0, q0
264 %xx = zext <16 x i8> %x to <16 x i32>
265 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
269 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x) {
270 ; CHECK-LABEL: add_v16i8_v16i32_sext:
271 ; CHECK: @ %bb.0: @ %entry
272 ; CHECK-NEXT: vaddv.s8 r0, q0
275 %xx = sext <16 x i8> %x to <16 x i32>
276 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
280 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x) {
281 ; CHECK-LABEL: add_v8i8_v8i32_zext:
282 ; CHECK: @ %bb.0: @ %entry
283 ; CHECK-NEXT: vmovlb.u8 q0, q0
284 ; CHECK-NEXT: vaddv.u16 r0, q0
287 %xx = zext <8 x i8> %x to <8 x i32>
288 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
292 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
293 ; CHECK-LABEL: add_v8i8_v8i32_sext:
294 ; CHECK: @ %bb.0: @ %entry
295 ; CHECK-NEXT: vmovlb.s8 q0, q0
296 ; CHECK-NEXT: vaddv.s16 r0, q0
299 %xx = sext <8 x i8> %x to <8 x i32>
300 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
304 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
305 ; CHECK-LABEL: add_v4i8_v4i32_zext:
306 ; CHECK: @ %bb.0: @ %entry
307 ; CHECK-NEXT: vmov.i32 q1, #0xff
308 ; CHECK-NEXT: vand q0, q0, q1
309 ; CHECK-NEXT: vaddv.u32 r0, q0
312 %xx = zext <4 x i8> %x to <4 x i32>
313 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
317 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x) {
318 ; CHECK-LABEL: add_v4i8_v4i32_sext:
319 ; CHECK: @ %bb.0: @ %entry
320 ; CHECK-NEXT: vmovlb.s8 q0, q0
321 ; CHECK-NEXT: vmovlb.s16 q0, q0
322 ; CHECK-NEXT: vaddv.u32 r0, q0
325 %xx = sext <4 x i8> %x to <4 x i32>
326 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
330 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
331 ; CHECK-LABEL: add_v16i8_v16i16_zext:
332 ; CHECK: @ %bb.0: @ %entry
333 ; CHECK-NEXT: vaddv.u8 r0, q0
334 ; CHECK-NEXT: uxth r0, r0
337 %xx = zext <16 x i8> %x to <16 x i16>
338 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
342 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
343 ; CHECK-LABEL: add_v16i8_v16i16_sext:
344 ; CHECK: @ %bb.0: @ %entry
345 ; CHECK-NEXT: vaddv.s8 r0, q0
346 ; CHECK-NEXT: sxth r0, r0
349 %xx = sext <16 x i8> %x to <16 x i16>
350 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
354 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) {
355 ; CHECK-LABEL: add_v8i8_v8i16_zext:
356 ; CHECK: @ %bb.0: @ %entry
357 ; CHECK-NEXT: vmovlb.u8 q0, q0
358 ; CHECK-NEXT: vaddv.u16 r0, q0
359 ; CHECK-NEXT: uxth r0, r0
362 %xx = zext <8 x i8> %x to <8 x i16>
363 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
367 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
368 ; CHECK-LABEL: add_v8i8_v8i16_sext:
369 ; CHECK: @ %bb.0: @ %entry
370 ; CHECK-NEXT: vmovlb.s8 q0, q0
371 ; CHECK-NEXT: vaddv.u16 r0, q0
372 ; CHECK-NEXT: sxth r0, r0
375 %xx = sext <8 x i8> %x to <8 x i16>
376 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
380 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) {
381 ; CHECK-LABEL: add_v16i8_v16i8:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vaddv.u8 r0, q0
384 ; CHECK-NEXT: uxtb r0, r0
387 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
391 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
392 ; CHECK-LABEL: add_v16i8_v16i64_zext:
393 ; CHECK: @ %bb.0: @ %entry
394 ; CHECK-NEXT: vmov.u8 r0, q0[1]
395 ; CHECK-NEXT: vmov.u8 r1, q0[0]
396 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
397 ; CHECK-NEXT: vmov.i64 q1, #0xff
398 ; CHECK-NEXT: vand q2, q2, q1
399 ; CHECK-NEXT: vmov.u8 r3, q0[2]
400 ; CHECK-NEXT: vmov r0, s10
401 ; CHECK-NEXT: vmov r1, r2, d4
402 ; CHECK-NEXT: add r0, r1
403 ; CHECK-NEXT: vmov.u8 r1, q0[3]
404 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
405 ; CHECK-NEXT: vmov.u8 r3, q0[4]
406 ; CHECK-NEXT: vand q2, q2, q1
407 ; CHECK-NEXT: vmov r1, s8
408 ; CHECK-NEXT: add r0, r1
409 ; CHECK-NEXT: vmov r1, s10
410 ; CHECK-NEXT: add r0, r1
411 ; CHECK-NEXT: vmov.u8 r1, q0[5]
412 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
413 ; CHECK-NEXT: vand q2, q2, q1
414 ; CHECK-NEXT: vmov r1, s8
415 ; CHECK-NEXT: add r0, r1
416 ; CHECK-NEXT: vmov r1, r3, d5
417 ; CHECK-NEXT: adds r0, r0, r1
418 ; CHECK-NEXT: adc.w r1, r2, r3
419 ; CHECK-NEXT: vmov.u8 r2, q0[7]
420 ; CHECK-NEXT: vmov.u8 r3, q0[6]
421 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
422 ; CHECK-NEXT: vand q2, q2, q1
423 ; CHECK-NEXT: vmov r2, r3, d4
424 ; CHECK-NEXT: adds r0, r0, r2
425 ; CHECK-NEXT: adcs r1, r3
426 ; CHECK-NEXT: vmov r2, r3, d5
427 ; CHECK-NEXT: adds r0, r0, r2
428 ; CHECK-NEXT: vmov.u8 r2, q0[9]
429 ; CHECK-NEXT: adcs r1, r3
430 ; CHECK-NEXT: vmov.u8 r3, q0[8]
431 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
432 ; CHECK-NEXT: vand q2, q2, q1
433 ; CHECK-NEXT: vmov r2, r3, d4
434 ; CHECK-NEXT: adds r0, r0, r2
435 ; CHECK-NEXT: adcs r1, r3
436 ; CHECK-NEXT: vmov r2, r3, d5
437 ; CHECK-NEXT: adds r0, r0, r2
438 ; CHECK-NEXT: vmov.u8 r2, q0[11]
439 ; CHECK-NEXT: adcs r1, r3
440 ; CHECK-NEXT: vmov.u8 r3, q0[10]
441 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
442 ; CHECK-NEXT: vand q2, q2, q1
443 ; CHECK-NEXT: vmov r2, r3, d4
444 ; CHECK-NEXT: adds r0, r0, r2
445 ; CHECK-NEXT: adcs r1, r3
446 ; CHECK-NEXT: vmov r2, r3, d5
447 ; CHECK-NEXT: adds r0, r0, r2
448 ; CHECK-NEXT: vmov.u8 r2, q0[13]
449 ; CHECK-NEXT: adcs r1, r3
450 ; CHECK-NEXT: vmov.u8 r3, q0[12]
451 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
452 ; CHECK-NEXT: vand q2, q2, q1
453 ; CHECK-NEXT: vmov r2, r3, d4
454 ; CHECK-NEXT: adds r0, r0, r2
455 ; CHECK-NEXT: adcs r1, r3
456 ; CHECK-NEXT: vmov r2, r3, d5
457 ; CHECK-NEXT: adds r0, r0, r2
458 ; CHECK-NEXT: vmov.u8 r2, q0[15]
459 ; CHECK-NEXT: adcs r1, r3
460 ; CHECK-NEXT: vmov.u8 r3, q0[14]
461 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
462 ; CHECK-NEXT: vand q0, q0, q1
463 ; CHECK-NEXT: vmov r2, r3, d0
464 ; CHECK-NEXT: adds r0, r0, r2
465 ; CHECK-NEXT: adcs r1, r3
466 ; CHECK-NEXT: vmov r2, r3, d1
467 ; CHECK-NEXT: adds r0, r0, r2
468 ; CHECK-NEXT: adcs r1, r3
471 %xx = zext <16 x i8> %x to <16 x i64>
472 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
476 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
477 ; CHECK-LABEL: add_v16i8_v16i64_sext:
478 ; CHECK: @ %bb.0: @ %entry
479 ; CHECK-NEXT: vmov.s8 r0, q0[0]
480 ; CHECK-NEXT: vmov.s8 r2, q0[1]
481 ; CHECK-NEXT: asrs r1, r0, #31
482 ; CHECK-NEXT: adds r0, r0, r2
483 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
484 ; CHECK-NEXT: vmov.s8 r2, q0[2]
485 ; CHECK-NEXT: adds r0, r0, r2
486 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
487 ; CHECK-NEXT: vmov.s8 r2, q0[3]
488 ; CHECK-NEXT: adds r0, r0, r2
489 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
490 ; CHECK-NEXT: vmov.s8 r2, q0[4]
491 ; CHECK-NEXT: adds r0, r0, r2
492 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
493 ; CHECK-NEXT: vmov.s8 r2, q0[5]
494 ; CHECK-NEXT: adds r0, r0, r2
495 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
496 ; CHECK-NEXT: vmov.s8 r2, q0[6]
497 ; CHECK-NEXT: adds r0, r0, r2
498 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
499 ; CHECK-NEXT: vmov.s8 r2, q0[7]
500 ; CHECK-NEXT: adds r0, r0, r2
501 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
502 ; CHECK-NEXT: vmov.s8 r2, q0[8]
503 ; CHECK-NEXT: adds r0, r0, r2
504 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
505 ; CHECK-NEXT: vmov.s8 r2, q0[9]
506 ; CHECK-NEXT: adds r0, r0, r2
507 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
508 ; CHECK-NEXT: vmov.s8 r2, q0[10]
509 ; CHECK-NEXT: adds r0, r0, r2
510 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
511 ; CHECK-NEXT: vmov.s8 r2, q0[11]
512 ; CHECK-NEXT: adds r0, r0, r2
513 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
514 ; CHECK-NEXT: vmov.s8 r2, q0[12]
515 ; CHECK-NEXT: adds r0, r0, r2
516 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
517 ; CHECK-NEXT: vmov.s8 r2, q0[13]
518 ; CHECK-NEXT: adds r0, r0, r2
519 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
520 ; CHECK-NEXT: vmov.s8 r2, q0[14]
521 ; CHECK-NEXT: adds r0, r0, r2
522 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
523 ; CHECK-NEXT: vmov.s8 r2, q0[15]
524 ; CHECK-NEXT: adds r0, r0, r2
525 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
528 %xx = sext <16 x i8> %x to <16 x i64>
529 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
533 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
534 ; CHECK-LABEL: add_v8i8_v8i64_zext:
535 ; CHECK: @ %bb.0: @ %entry
536 ; CHECK-NEXT: vmovlb.u8 q0, q0
537 ; CHECK-NEXT: vmov.i64 q1, #0xffff
538 ; CHECK-NEXT: vmov.u16 r0, q0[1]
539 ; CHECK-NEXT: vmov.u16 r1, q0[0]
540 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
541 ; CHECK-NEXT: vmov.u16 r3, q0[2]
542 ; CHECK-NEXT: vand q2, q2, q1
543 ; CHECK-NEXT: vmov r0, s10
544 ; CHECK-NEXT: vmov r1, r2, d4
545 ; CHECK-NEXT: add r0, r1
546 ; CHECK-NEXT: vmov.u16 r1, q0[3]
547 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
548 ; CHECK-NEXT: vmov.u16 r3, q0[4]
549 ; CHECK-NEXT: vand q2, q2, q1
550 ; CHECK-NEXT: vmov r1, s8
551 ; CHECK-NEXT: add r0, r1
552 ; CHECK-NEXT: vmov r1, s10
553 ; CHECK-NEXT: add r0, r1
554 ; CHECK-NEXT: vmov.u16 r1, q0[5]
555 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
556 ; CHECK-NEXT: vand q2, q2, q1
557 ; CHECK-NEXT: vmov r1, s8
558 ; CHECK-NEXT: add r0, r1
559 ; CHECK-NEXT: vmov r1, r3, d5
560 ; CHECK-NEXT: adds r0, r0, r1
561 ; CHECK-NEXT: adc.w r1, r2, r3
562 ; CHECK-NEXT: vmov.u16 r2, q0[7]
563 ; CHECK-NEXT: vmov.u16 r3, q0[6]
564 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
565 ; CHECK-NEXT: vand q0, q0, q1
566 ; CHECK-NEXT: vmov r2, r3, d0
567 ; CHECK-NEXT: adds r0, r0, r2
568 ; CHECK-NEXT: adcs r1, r3
569 ; CHECK-NEXT: vmov r2, r3, d1
570 ; CHECK-NEXT: adds r0, r0, r2
571 ; CHECK-NEXT: adcs r1, r3
574 %xx = zext <8 x i8> %x to <8 x i64>
575 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
579 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
580 ; CHECK-LABEL: add_v8i8_v8i64_sext:
581 ; CHECK: @ %bb.0: @ %entry
582 ; CHECK-NEXT: vmov.u16 r0, q0[0]
583 ; CHECK-NEXT: vmov.u16 r2, q0[1]
584 ; CHECK-NEXT: sxtb r0, r0
585 ; CHECK-NEXT: sxtb r2, r2
586 ; CHECK-NEXT: asrs r1, r0, #31
587 ; CHECK-NEXT: adds r0, r0, r2
588 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
589 ; CHECK-NEXT: vmov.u16 r2, q0[2]
590 ; CHECK-NEXT: sxtb r2, r2
591 ; CHECK-NEXT: adds r0, r0, r2
592 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
593 ; CHECK-NEXT: vmov.u16 r2, q0[3]
594 ; CHECK-NEXT: sxtb r2, r2
595 ; CHECK-NEXT: adds r0, r0, r2
596 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
597 ; CHECK-NEXT: vmov.u16 r2, q0[4]
598 ; CHECK-NEXT: sxtb r2, r2
599 ; CHECK-NEXT: adds r0, r0, r2
600 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
601 ; CHECK-NEXT: vmov.u16 r2, q0[5]
602 ; CHECK-NEXT: sxtb r2, r2
603 ; CHECK-NEXT: adds r0, r0, r2
604 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
605 ; CHECK-NEXT: vmov.u16 r2, q0[6]
606 ; CHECK-NEXT: sxtb r2, r2
607 ; CHECK-NEXT: adds r0, r0, r2
608 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
609 ; CHECK-NEXT: vmov.u16 r2, q0[7]
610 ; CHECK-NEXT: sxtb r2, r2
611 ; CHECK-NEXT: adds r0, r0, r2
612 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
615 %xx = sext <8 x i8> %x to <8 x i64>
616 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
620 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x) {
621 ; CHECK-LABEL: add_v4i8_v4i64_zext:
622 ; CHECK: @ %bb.0: @ %entry
623 ; CHECK-NEXT: vmov.i32 q1, #0xff
624 ; CHECK-NEXT: vand q0, q0, q1
625 ; CHECK-NEXT: vaddlv.u32 r0, r1, q0
628 %xx = zext <4 x i8> %x to <4 x i64>
629 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
633 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x) {
634 ; CHECK-LABEL: add_v4i8_v4i64_sext:
635 ; CHECK: @ %bb.0: @ %entry
636 ; CHECK-NEXT: vmovlb.s8 q0, q0
637 ; CHECK-NEXT: vmovlb.s16 q0, q0
638 ; CHECK-NEXT: vaddlv.s32 r0, r1, q0
641 %xx = sext <4 x i8> %x to <4 x i64>
642 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
646 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
647 ; CHECK-LABEL: add_v2i8_v2i64_zext:
648 ; CHECK: @ %bb.0: @ %entry
649 ; CHECK-NEXT: vmov.i64 q1, #0xff
650 ; CHECK-NEXT: vand q0, q0, q1
651 ; CHECK-NEXT: vmov r0, s2
652 ; CHECK-NEXT: vmov r2, r1, d0
653 ; CHECK-NEXT: add r0, r2
656 %xx = zext <2 x i8> %x to <2 x i64>
657 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
661 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x) {
662 ; CHECK-LABEL: add_v2i8_v2i64_sext:
663 ; CHECK: @ %bb.0: @ %entry
664 ; CHECK-NEXT: vmov r0, s0
665 ; CHECK-NEXT: vmov r2, s2
666 ; CHECK-NEXT: sxtb r0, r0
667 ; CHECK-NEXT: asrs r1, r0, #31
668 ; CHECK-NEXT: sxtb r2, r2
669 ; CHECK-NEXT: adds r0, r0, r2
670 ; CHECK-NEXT: adc.w r1, r1, r2, asr #31
673 %xx = sext <2 x i8> %x to <2 x i64>
674 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
678 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x) {
679 ; CHECK-LABEL: add_v2i64_v2i64:
680 ; CHECK: @ %bb.0: @ %entry
681 ; CHECK-NEXT: vmov r0, r1, d1
682 ; CHECK-NEXT: vmov r2, r3, d0
683 ; CHECK-NEXT: adds r0, r0, r2
684 ; CHECK-NEXT: adcs r1, r3
687 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
691 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, i32 %a) {
692 ; CHECK-LABEL: add_v4i32_v4i32_acc:
693 ; CHECK: @ %bb.0: @ %entry
694 ; CHECK-NEXT: vaddva.u32 r0, q0
697 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
702 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) {
703 ; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
704 ; CHECK: @ %bb.0: @ %entry
705 ; CHECK-NEXT: vaddlva.u32 r0, r1, q0
708 %xx = zext <4 x i32> %x to <4 x i64>
709 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
714 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
715 ; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
716 ; CHECK: @ %bb.0: @ %entry
717 ; CHECK-NEXT: vaddlva.s32 r0, r1, q0
720 %xx = sext <4 x i32> %x to <4 x i64>
721 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
726 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) {
727 ; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
728 ; CHECK: @ %bb.0: @ %entry
729 ; CHECK-NEXT: .save {r7, lr}
730 ; CHECK-NEXT: push {r7, lr}
731 ; CHECK-NEXT: vmov.i64 q1, #0xffffffff
732 ; CHECK-NEXT: vand q0, q0, q1
733 ; CHECK-NEXT: vmov lr, r12, d1
734 ; CHECK-NEXT: vmov r3, r2, d0
735 ; CHECK-NEXT: adds.w r3, r3, lr
736 ; CHECK-NEXT: adc.w r2, r2, r12
737 ; CHECK-NEXT: adds r0, r0, r3
738 ; CHECK-NEXT: adcs r1, r2
739 ; CHECK-NEXT: pop {r7, pc}
741 %xx = zext <2 x i32> %x to <2 x i64>
742 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
747 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) {
748 ; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
749 ; CHECK: @ %bb.0: @ %entry
750 ; CHECK-NEXT: vmov r2, s0
751 ; CHECK-NEXT: vmov r3, s2
752 ; CHECK-NEXT: asr.w r12, r2, #31
753 ; CHECK-NEXT: adds r2, r2, r3
754 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
755 ; CHECK-NEXT: adds r0, r0, r2
756 ; CHECK-NEXT: adcs r1, r3
759 %xx = sext <2 x i32> %x to <2 x i64>
760 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
765 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) {
766 ; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
767 ; CHECK: @ %bb.0: @ %entry
768 ; CHECK-NEXT: vaddva.u16 r0, q0
771 %xx = zext <8 x i16> %x to <8 x i32>
772 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
777 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
778 ; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
779 ; CHECK: @ %bb.0: @ %entry
780 ; CHECK-NEXT: vaddva.s16 r0, q0
783 %xx = sext <8 x i16> %x to <8 x i32>
784 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
789 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) {
790 ; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
791 ; CHECK: @ %bb.0: @ %entry
792 ; CHECK-NEXT: vmovlb.u16 q0, q0
793 ; CHECK-NEXT: vaddva.u32 r0, q0
796 %xx = zext <4 x i16> %x to <4 x i32>
797 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
802 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) {
803 ; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
804 ; CHECK: @ %bb.0: @ %entry
805 ; CHECK-NEXT: vmovlb.s16 q0, q0
806 ; CHECK-NEXT: vaddva.u32 r0, q0
809 %xx = sext <4 x i16> %x to <4 x i32>
810 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
815 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) {
816 ; CHECK-LABEL: add_v8i16_v8i16_acc:
817 ; CHECK: @ %bb.0: @ %entry
818 ; CHECK-NEXT: vaddva.u16 r0, q0
819 ; CHECK-NEXT: uxth r0, r0
822 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
827 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
828 ; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
829 ; CHECK: @ %bb.0: @ %entry
830 ; CHECK-NEXT: .save {r7, lr}
831 ; CHECK-NEXT: push {r7, lr}
832 ; CHECK-NEXT: vmov.u16 r2, q0[1]
833 ; CHECK-NEXT: vmov.u16 r3, q0[0]
834 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
835 ; CHECK-NEXT: vmov.i64 q1, #0xffff
836 ; CHECK-NEXT: vand q2, q2, q1
837 ; CHECK-NEXT: vmov r2, s10
838 ; CHECK-NEXT: vmov r3, r12, d4
839 ; CHECK-NEXT: add.w lr, r3, r2
840 ; CHECK-NEXT: vmov.u16 r3, q0[3]
841 ; CHECK-NEXT: vmov.u16 r2, q0[2]
842 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
843 ; CHECK-NEXT: vand q2, q2, q1
844 ; CHECK-NEXT: vmov r2, s8
845 ; CHECK-NEXT: vmov r3, s10
846 ; CHECK-NEXT: add r2, lr
847 ; CHECK-NEXT: add.w lr, r2, r3
848 ; CHECK-NEXT: vmov.u16 r3, q0[5]
849 ; CHECK-NEXT: vmov.u16 r2, q0[4]
850 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
851 ; CHECK-NEXT: vand q2, q2, q1
852 ; CHECK-NEXT: vmov r2, s8
853 ; CHECK-NEXT: add lr, r2
854 ; CHECK-NEXT: vmov r3, r2, d5
855 ; CHECK-NEXT: adds.w lr, lr, r3
856 ; CHECK-NEXT: vmov.u16 r3, q0[6]
857 ; CHECK-NEXT: adc.w r12, r12, r2
858 ; CHECK-NEXT: vmov.u16 r2, q0[7]
859 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
860 ; CHECK-NEXT: vand q0, q0, q1
861 ; CHECK-NEXT: vmov r2, r3, d0
862 ; CHECK-NEXT: adds.w lr, lr, r2
863 ; CHECK-NEXT: adc.w r12, r12, r3
864 ; CHECK-NEXT: vmov r2, r3, d1
865 ; CHECK-NEXT: adds.w r2, r2, lr
866 ; CHECK-NEXT: adc.w r3, r3, r12
867 ; CHECK-NEXT: adds r0, r0, r2
868 ; CHECK-NEXT: adcs r1, r3
869 ; CHECK-NEXT: pop {r7, pc}
871 %xx = zext <8 x i16> %x to <8 x i64>
872 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
877 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
878 ; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
879 ; CHECK: @ %bb.0: @ %entry
880 ; CHECK-NEXT: .save {r7, lr}
881 ; CHECK-NEXT: push {r7, lr}
882 ; CHECK-NEXT: vmov.s16 r2, q0[0]
883 ; CHECK-NEXT: vmov.s16 r3, q0[1]
884 ; CHECK-NEXT: asr.w r12, r2, #31
885 ; CHECK-NEXT: adds.w lr, r2, r3
886 ; CHECK-NEXT: vmov.s16 r2, q0[2]
887 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
888 ; CHECK-NEXT: adds.w r12, lr, r2
889 ; CHECK-NEXT: adc.w r2, r3, r2, asr #31
890 ; CHECK-NEXT: vmov.s16 r3, q0[3]
891 ; CHECK-NEXT: adds.w r12, r12, r3
892 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
893 ; CHECK-NEXT: vmov.s16 r3, q0[4]
894 ; CHECK-NEXT: adds.w r12, r12, r3
895 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
896 ; CHECK-NEXT: vmov.s16 r3, q0[5]
897 ; CHECK-NEXT: adds.w r12, r12, r3
898 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
899 ; CHECK-NEXT: vmov.s16 r3, q0[6]
900 ; CHECK-NEXT: adds.w r12, r12, r3
901 ; CHECK-NEXT: adc.w lr, r2, r3, asr #31
902 ; CHECK-NEXT: vmov.s16 r3, q0[7]
903 ; CHECK-NEXT: adds.w r2, r12, r3
904 ; CHECK-NEXT: adc.w r3, lr, r3, asr #31
905 ; CHECK-NEXT: adds r0, r0, r2
906 ; CHECK-NEXT: adcs r1, r3
907 ; CHECK-NEXT: pop {r7, pc}
909 %xx = sext <8 x i16> %x to <8 x i64>
910 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
915 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) {
916 ; CHECK-LABEL: add_v4i16_v4i64_acc_zext:
917 ; CHECK: @ %bb.0: @ %entry
918 ; CHECK-NEXT: vmovlb.u16 q0, q0
919 ; CHECK-NEXT: vaddlva.u32 r0, r1, q0
922 %xx = zext <4 x i16> %x to <4 x i64>
923 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
928 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
929 ; CHECK-LABEL: add_v4i16_v4i64_acc_sext:
930 ; CHECK: @ %bb.0: @ %entry
931 ; CHECK-NEXT: vmovlb.s16 q0, q0
932 ; CHECK-NEXT: vaddlva.s32 r0, r1, q0
935 %xx = sext <4 x i16> %x to <4 x i64>
936 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
941 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
942 ; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
943 ; CHECK: @ %bb.0: @ %entry
944 ; CHECK-NEXT: vmov.i64 q1, #0xffff
945 ; CHECK-NEXT: vand q0, q0, q1
946 ; CHECK-NEXT: vmov r2, s2
947 ; CHECK-NEXT: vmov r3, r12, d0
948 ; CHECK-NEXT: add r2, r3
949 ; CHECK-NEXT: adds r0, r0, r2
950 ; CHECK-NEXT: adc.w r1, r1, r12
953 %xx = zext <2 x i16> %x to <2 x i64>
954 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
959 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) {
960 ; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
961 ; CHECK: @ %bb.0: @ %entry
962 ; CHECK-NEXT: vmov r2, s0
963 ; CHECK-NEXT: vmov r3, s2
964 ; CHECK-NEXT: sxth r2, r2
965 ; CHECK-NEXT: asr.w r12, r2, #31
966 ; CHECK-NEXT: sxth r3, r3
967 ; CHECK-NEXT: adds r2, r2, r3
968 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
969 ; CHECK-NEXT: adds r0, r0, r2
970 ; CHECK-NEXT: adcs r1, r3
973 %xx = sext <2 x i16> %x to <2 x i64>
974 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
979 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) {
980 ; CHECK-LABEL: add_v16i8_v16i32_acc_zext:
981 ; CHECK: @ %bb.0: @ %entry
982 ; CHECK-NEXT: vaddva.u8 r0, q0
985 %xx = zext <16 x i8> %x to <16 x i32>
986 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
991 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) {
992 ; CHECK-LABEL: add_v16i8_v16i32_acc_sext:
993 ; CHECK: @ %bb.0: @ %entry
994 ; CHECK-NEXT: vaddva.s8 r0, q0
997 %xx = sext <16 x i8> %x to <16 x i32>
998 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
1003 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) {
1004 ; CHECK-LABEL: add_v8i8_v8i32_acc_zext:
1005 ; CHECK: @ %bb.0: @ %entry
1006 ; CHECK-NEXT: vmovlb.u8 q0, q0
1007 ; CHECK-NEXT: vaddva.u16 r0, q0
1010 %xx = zext <8 x i8> %x to <8 x i32>
1011 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
1016 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
1017 ; CHECK-LABEL: add_v8i8_v8i32_acc_sext:
1018 ; CHECK: @ %bb.0: @ %entry
1019 ; CHECK-NEXT: vmovlb.s8 q0, q0
1020 ; CHECK-NEXT: vaddva.s16 r0, q0
1023 %xx = sext <8 x i8> %x to <8 x i32>
1024 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
1029 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
1030 ; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
1031 ; CHECK: @ %bb.0: @ %entry
1032 ; CHECK-NEXT: vmov.i32 q1, #0xff
1033 ; CHECK-NEXT: vand q0, q0, q1
1034 ; CHECK-NEXT: vaddva.u32 r0, q0
1037 %xx = zext <4 x i8> %x to <4 x i32>
1038 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
1043 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, i32 %a) {
1044 ; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
1045 ; CHECK: @ %bb.0: @ %entry
1046 ; CHECK-NEXT: vmovlb.s8 q0, q0
1047 ; CHECK-NEXT: vmovlb.s16 q0, q0
1048 ; CHECK-NEXT: vaddva.u32 r0, q0
1051 %xx = sext <4 x i8> %x to <4 x i32>
1052 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
1057 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
1058 ; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
1059 ; CHECK: @ %bb.0: @ %entry
1060 ; CHECK-NEXT: vaddva.u8 r0, q0
1061 ; CHECK-NEXT: uxth r0, r0
1064 %xx = zext <16 x i8> %x to <16 x i16>
1065 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
1070 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
1071 ; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
1072 ; CHECK: @ %bb.0: @ %entry
1073 ; CHECK-NEXT: vaddva.s8 r0, q0
1074 ; CHECK-NEXT: sxth r0, r0
1077 %xx = sext <16 x i8> %x to <16 x i16>
1078 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
1083 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
1084 ; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
1085 ; CHECK: @ %bb.0: @ %entry
1086 ; CHECK-NEXT: vmovlb.u8 q0, q0
1087 ; CHECK-NEXT: vaddva.u16 r0, q0
1088 ; CHECK-NEXT: uxth r0, r0
1091 %xx = zext <8 x i8> %x to <8 x i16>
1092 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
1097 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
1098 ; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
1099 ; CHECK: @ %bb.0: @ %entry
1100 ; CHECK-NEXT: vmovlb.s8 q0, q0
1101 ; CHECK-NEXT: vaddva.u16 r0, q0
1102 ; CHECK-NEXT: sxth r0, r0
1105 %xx = sext <8 x i8> %x to <8 x i16>
1106 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
1111 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) {
1112 ; CHECK-LABEL: add_v16i8_v16i8_acc:
1113 ; CHECK: @ %bb.0: @ %entry
1114 ; CHECK-NEXT: vaddva.u8 r0, q0
1115 ; CHECK-NEXT: uxtb r0, r0
1118 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
1123 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
1124 ; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
1125 ; CHECK: @ %bb.0: @ %entry
1126 ; CHECK-NEXT: .save {r7, lr}
1127 ; CHECK-NEXT: push {r7, lr}
1128 ; CHECK-NEXT: vmov.u8 r2, q0[1]
1129 ; CHECK-NEXT: vmov.u8 r3, q0[0]
1130 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1131 ; CHECK-NEXT: vmov.i64 q1, #0xff
1132 ; CHECK-NEXT: vand q2, q2, q1
1133 ; CHECK-NEXT: vmov r2, s10
1134 ; CHECK-NEXT: vmov r3, r12, d4
1135 ; CHECK-NEXT: add.w lr, r3, r2
1136 ; CHECK-NEXT: vmov.u8 r3, q0[3]
1137 ; CHECK-NEXT: vmov.u8 r2, q0[2]
1138 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1139 ; CHECK-NEXT: vand q2, q2, q1
1140 ; CHECK-NEXT: vmov r2, s8
1141 ; CHECK-NEXT: vmov r3, s10
1142 ; CHECK-NEXT: add r2, lr
1143 ; CHECK-NEXT: add.w lr, r2, r3
1144 ; CHECK-NEXT: vmov.u8 r3, q0[5]
1145 ; CHECK-NEXT: vmov.u8 r2, q0[4]
1146 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1147 ; CHECK-NEXT: vand q2, q2, q1
1148 ; CHECK-NEXT: vmov r2, s8
1149 ; CHECK-NEXT: add lr, r2
1150 ; CHECK-NEXT: vmov r3, r2, d5
1151 ; CHECK-NEXT: adds.w lr, lr, r3
1152 ; CHECK-NEXT: vmov.u8 r3, q0[6]
1153 ; CHECK-NEXT: adc.w r12, r12, r2
1154 ; CHECK-NEXT: vmov.u8 r2, q0[7]
1155 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1156 ; CHECK-NEXT: vand q2, q2, q1
1157 ; CHECK-NEXT: vmov r2, r3, d4
1158 ; CHECK-NEXT: adds.w lr, lr, r2
1159 ; CHECK-NEXT: adc.w r12, r12, r3
1160 ; CHECK-NEXT: vmov r2, r3, d5
1161 ; CHECK-NEXT: adds.w lr, lr, r2
1162 ; CHECK-NEXT: vmov.u8 r2, q0[9]
1163 ; CHECK-NEXT: adc.w r12, r12, r3
1164 ; CHECK-NEXT: vmov.u8 r3, q0[8]
1165 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1166 ; CHECK-NEXT: vand q2, q2, q1
1167 ; CHECK-NEXT: vmov r2, r3, d4
1168 ; CHECK-NEXT: adds.w lr, lr, r2
1169 ; CHECK-NEXT: adc.w r12, r12, r3
1170 ; CHECK-NEXT: vmov r2, r3, d5
1171 ; CHECK-NEXT: adds.w lr, lr, r2
1172 ; CHECK-NEXT: vmov.u8 r2, q0[11]
1173 ; CHECK-NEXT: adc.w r12, r12, r3
1174 ; CHECK-NEXT: vmov.u8 r3, q0[10]
1175 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1176 ; CHECK-NEXT: vand q2, q2, q1
1177 ; CHECK-NEXT: vmov r2, r3, d4
1178 ; CHECK-NEXT: adds.w lr, lr, r2
1179 ; CHECK-NEXT: adc.w r12, r12, r3
1180 ; CHECK-NEXT: vmov r2, r3, d5
1181 ; CHECK-NEXT: adds.w lr, lr, r2
1182 ; CHECK-NEXT: vmov.u8 r2, q0[13]
1183 ; CHECK-NEXT: adc.w r12, r12, r3
1184 ; CHECK-NEXT: vmov.u8 r3, q0[12]
1185 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1186 ; CHECK-NEXT: vand q2, q2, q1
1187 ; CHECK-NEXT: vmov r2, r3, d4
1188 ; CHECK-NEXT: adds.w lr, lr, r2
1189 ; CHECK-NEXT: adc.w r12, r12, r3
1190 ; CHECK-NEXT: vmov r2, r3, d5
1191 ; CHECK-NEXT: adds.w lr, lr, r2
1192 ; CHECK-NEXT: vmov.u8 r2, q0[15]
1193 ; CHECK-NEXT: adc.w r12, r12, r3
1194 ; CHECK-NEXT: vmov.u8 r3, q0[14]
1195 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1196 ; CHECK-NEXT: vand q0, q0, q1
1197 ; CHECK-NEXT: vmov r2, r3, d0
1198 ; CHECK-NEXT: adds.w lr, lr, r2
1199 ; CHECK-NEXT: adc.w r12, r12, r3
1200 ; CHECK-NEXT: vmov r2, r3, d1
1201 ; CHECK-NEXT: adds.w r2, r2, lr
1202 ; CHECK-NEXT: adc.w r3, r3, r12
1203 ; CHECK-NEXT: adds r0, r0, r2
1204 ; CHECK-NEXT: adcs r1, r3
1205 ; CHECK-NEXT: pop {r7, pc}
1207 %xx = zext <16 x i8> %x to <16 x i64>
1208 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
1213 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
1214 ; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
1215 ; CHECK: @ %bb.0: @ %entry
1216 ; CHECK-NEXT: .save {r7, lr}
1217 ; CHECK-NEXT: push {r7, lr}
1218 ; CHECK-NEXT: vmov.s8 r2, q0[0]
1219 ; CHECK-NEXT: vmov.s8 r3, q0[1]
1220 ; CHECK-NEXT: asr.w r12, r2, #31
1221 ; CHECK-NEXT: adds.w lr, r2, r3
1222 ; CHECK-NEXT: vmov.s8 r2, q0[2]
1223 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
1224 ; CHECK-NEXT: adds.w r12, lr, r2
1225 ; CHECK-NEXT: adc.w r2, r3, r2, asr #31
1226 ; CHECK-NEXT: vmov.s8 r3, q0[3]
1227 ; CHECK-NEXT: adds.w r12, r12, r3
1228 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1229 ; CHECK-NEXT: vmov.s8 r3, q0[4]
1230 ; CHECK-NEXT: adds.w r12, r12, r3
1231 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1232 ; CHECK-NEXT: vmov.s8 r3, q0[5]
1233 ; CHECK-NEXT: adds.w r12, r12, r3
1234 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1235 ; CHECK-NEXT: vmov.s8 r3, q0[6]
1236 ; CHECK-NEXT: adds.w r12, r12, r3
1237 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1238 ; CHECK-NEXT: vmov.s8 r3, q0[7]
1239 ; CHECK-NEXT: adds.w r12, r12, r3
1240 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1241 ; CHECK-NEXT: vmov.s8 r3, q0[8]
1242 ; CHECK-NEXT: adds.w r12, r12, r3
1243 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1244 ; CHECK-NEXT: vmov.s8 r3, q0[9]
1245 ; CHECK-NEXT: adds.w r12, r12, r3
1246 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1247 ; CHECK-NEXT: vmov.s8 r3, q0[10]
1248 ; CHECK-NEXT: adds.w r12, r12, r3
1249 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1250 ; CHECK-NEXT: vmov.s8 r3, q0[11]
1251 ; CHECK-NEXT: adds.w r12, r12, r3
1252 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1253 ; CHECK-NEXT: vmov.s8 r3, q0[12]
1254 ; CHECK-NEXT: adds.w r12, r12, r3
1255 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1256 ; CHECK-NEXT: vmov.s8 r3, q0[13]
1257 ; CHECK-NEXT: adds.w r12, r12, r3
1258 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1259 ; CHECK-NEXT: vmov.s8 r3, q0[14]
1260 ; CHECK-NEXT: adds.w r12, r12, r3
1261 ; CHECK-NEXT: adc.w lr, r2, r3, asr #31
1262 ; CHECK-NEXT: vmov.s8 r3, q0[15]
1263 ; CHECK-NEXT: adds.w r2, r12, r3
1264 ; CHECK-NEXT: adc.w r3, lr, r3, asr #31
1265 ; CHECK-NEXT: adds r0, r0, r2
1266 ; CHECK-NEXT: adcs r1, r3
1267 ; CHECK-NEXT: pop {r7, pc}
1269 %xx = sext <16 x i8> %x to <16 x i64>
1270 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
1275 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
1276 ; CHECK-LABEL: add_v8i8_v8i64_acc_zext:
1277 ; CHECK: @ %bb.0: @ %entry
1278 ; CHECK-NEXT: .save {r7, lr}
1279 ; CHECK-NEXT: push {r7, lr}
1280 ; CHECK-NEXT: vmovlb.u8 q0, q0
1281 ; CHECK-NEXT: vmov.i64 q1, #0xffff
1282 ; CHECK-NEXT: vmov.u16 r2, q0[1]
1283 ; CHECK-NEXT: vmov.u16 r3, q0[0]
1284 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1285 ; CHECK-NEXT: vand q2, q2, q1
1286 ; CHECK-NEXT: vmov r2, s10
1287 ; CHECK-NEXT: vmov r3, r12, d4
1288 ; CHECK-NEXT: add.w lr, r3, r2
1289 ; CHECK-NEXT: vmov.u16 r3, q0[3]
1290 ; CHECK-NEXT: vmov.u16 r2, q0[2]
1291 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1292 ; CHECK-NEXT: vand q2, q2, q1
1293 ; CHECK-NEXT: vmov r2, s8
1294 ; CHECK-NEXT: vmov r3, s10
1295 ; CHECK-NEXT: add r2, lr
1296 ; CHECK-NEXT: add.w lr, r2, r3
1297 ; CHECK-NEXT: vmov.u16 r3, q0[5]
1298 ; CHECK-NEXT: vmov.u16 r2, q0[4]
1299 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1300 ; CHECK-NEXT: vand q2, q2, q1
1301 ; CHECK-NEXT: vmov r2, s8
1302 ; CHECK-NEXT: add lr, r2
1303 ; CHECK-NEXT: vmov r3, r2, d5
1304 ; CHECK-NEXT: adds.w lr, lr, r3
1305 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1306 ; CHECK-NEXT: adc.w r12, r12, r2
1307 ; CHECK-NEXT: vmov.u16 r2, q0[7]
1308 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1309 ; CHECK-NEXT: vand q0, q0, q1
1310 ; CHECK-NEXT: vmov r2, r3, d0
1311 ; CHECK-NEXT: adds.w lr, lr, r2
1312 ; CHECK-NEXT: adc.w r12, r12, r3
1313 ; CHECK-NEXT: vmov r2, r3, d1
1314 ; CHECK-NEXT: adds.w r2, r2, lr
1315 ; CHECK-NEXT: adc.w r3, r3, r12
1316 ; CHECK-NEXT: adds r0, r0, r2
1317 ; CHECK-NEXT: adcs r1, r3
1318 ; CHECK-NEXT: pop {r7, pc}
1320 %xx = zext <8 x i8> %x to <8 x i64>
1321 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
1326 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
1327 ; CHECK-LABEL: add_v8i8_v8i64_acc_sext:
1328 ; CHECK: @ %bb.0: @ %entry
1329 ; CHECK-NEXT: .save {r7, lr}
1330 ; CHECK-NEXT: push {r7, lr}
1331 ; CHECK-NEXT: vmov.u16 r2, q0[0]
1332 ; CHECK-NEXT: vmov.u16 r3, q0[1]
1333 ; CHECK-NEXT: sxtb r2, r2
1334 ; CHECK-NEXT: sxtb r3, r3
1335 ; CHECK-NEXT: asr.w r12, r2, #31
1336 ; CHECK-NEXT: adds.w lr, r2, r3
1337 ; CHECK-NEXT: vmov.u16 r2, q0[2]
1338 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
1339 ; CHECK-NEXT: sxtb r2, r2
1340 ; CHECK-NEXT: adds.w r12, lr, r2
1341 ; CHECK-NEXT: adc.w r2, r3, r2, asr #31
1342 ; CHECK-NEXT: vmov.u16 r3, q0[3]
1343 ; CHECK-NEXT: sxtb r3, r3
1344 ; CHECK-NEXT: adds.w r12, r12, r3
1345 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1346 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1347 ; CHECK-NEXT: sxtb r3, r3
1348 ; CHECK-NEXT: adds.w r12, r12, r3
1349 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1350 ; CHECK-NEXT: vmov.u16 r3, q0[5]
1351 ; CHECK-NEXT: sxtb r3, r3
1352 ; CHECK-NEXT: adds.w r12, r12, r3
1353 ; CHECK-NEXT: adc.w r2, r2, r3, asr #31
1354 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1355 ; CHECK-NEXT: sxtb r3, r3
1356 ; CHECK-NEXT: adds.w r12, r12, r3
1357 ; CHECK-NEXT: adc.w lr, r2, r3, asr #31
1358 ; CHECK-NEXT: vmov.u16 r3, q0[7]
1359 ; CHECK-NEXT: sxtb r3, r3
1360 ; CHECK-NEXT: adds.w r2, r12, r3
1361 ; CHECK-NEXT: adc.w r3, lr, r3, asr #31
1362 ; CHECK-NEXT: adds r0, r0, r2
1363 ; CHECK-NEXT: adcs r1, r3
1364 ; CHECK-NEXT: pop {r7, pc}
1366 %xx = sext <8 x i8> %x to <8 x i64>
1367 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
1372 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) {
1373 ; CHECK-LABEL: add_v4i8_v4i64_acc_zext:
1374 ; CHECK: @ %bb.0: @ %entry
1375 ; CHECK-NEXT: vmov.i32 q1, #0xff
1376 ; CHECK-NEXT: vand q0, q0, q1
1377 ; CHECK-NEXT: vaddlva.u32 r0, r1, q0
1380 %xx = zext <4 x i8> %x to <4 x i64>
1381 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
1386 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) {
1387 ; CHECK-LABEL: add_v4i8_v4i64_acc_sext:
1388 ; CHECK: @ %bb.0: @ %entry
1389 ; CHECK-NEXT: vmovlb.s8 q0, q0
1390 ; CHECK-NEXT: vmovlb.s16 q0, q0
1391 ; CHECK-NEXT: vaddlva.s32 r0, r1, q0
1394 %xx = sext <4 x i8> %x to <4 x i64>
1395 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
1400 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
1401 ; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
1402 ; CHECK: @ %bb.0: @ %entry
1403 ; CHECK-NEXT: vmov.i64 q1, #0xff
1404 ; CHECK-NEXT: vand q0, q0, q1
1405 ; CHECK-NEXT: vmov r2, s2
1406 ; CHECK-NEXT: vmov r3, r12, d0
1407 ; CHECK-NEXT: add r2, r3
1408 ; CHECK-NEXT: adds r0, r0, r2
1409 ; CHECK-NEXT: adc.w r1, r1, r12
1412 %xx = zext <2 x i8> %x to <2 x i64>
1413 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
1418 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) {
1419 ; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
1420 ; CHECK: @ %bb.0: @ %entry
1421 ; CHECK-NEXT: vmov r2, s0
1422 ; CHECK-NEXT: vmov r3, s2
1423 ; CHECK-NEXT: sxtb r2, r2
1424 ; CHECK-NEXT: asr.w r12, r2, #31
1425 ; CHECK-NEXT: sxtb r3, r3
1426 ; CHECK-NEXT: adds r2, r2, r3
1427 ; CHECK-NEXT: adc.w r3, r12, r3, asr #31
1428 ; CHECK-NEXT: adds r0, r0, r2
1429 ; CHECK-NEXT: adcs r1, r3
1432 %xx = sext <2 x i8> %x to <2 x i64>
1433 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
1438 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, i64 %a) {
1439 ; CHECK-LABEL: add_v2i64_v2i64_acc:
1440 ; CHECK: @ %bb.0: @ %entry
1441 ; CHECK-NEXT: .save {r7, lr}
1442 ; CHECK-NEXT: push {r7, lr}
1443 ; CHECK-NEXT: vmov lr, r12, d1
1444 ; CHECK-NEXT: vmov r3, r2, d0
1445 ; CHECK-NEXT: adds.w r3, r3, lr
1446 ; CHECK-NEXT: adc.w r2, r2, r12
1447 ; CHECK-NEXT: adds r0, r0, r3
1448 ; CHECK-NEXT: adcs r1, r2
1449 ; CHECK-NEXT: pop {r7, pc}
1451 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
1456 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
1457 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
1458 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
1459 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
1460 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
1461 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
1462 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
1463 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
1464 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
1465 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)