1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %b) {
5 ; CHECK-LABEL: add_v4i32_v4i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vpt.i32 eq, q1, zr
8 ; CHECK-NEXT: vaddvt.u32 r0, q0
11 %c = icmp eq <4 x i32> %b, zeroinitializer
12 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
13 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
17 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %b) {
18 ; CHECK-LABEL: add_v4i32_v4i64_zext:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vpt.i32 eq, q1, zr
21 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
24 %c = icmp eq <4 x i32> %b, zeroinitializer
25 %xx = zext <4 x i32> %x to <4 x i64>
26 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
27 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
31 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %b) {
32 ; CHECK-LABEL: add_v4i32_v4i64_sext:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vpt.i32 eq, q1, zr
35 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
38 %c = icmp eq <4 x i32> %b, zeroinitializer
39 %xx = sext <4 x i32> %x to <4 x i64>
40 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
41 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
45 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
46 ; CHECK-LABEL: add_v2i32_v2i64_zext:
47 ; CHECK: @ %bb.0: @ %entry
48 ; CHECK-NEXT: vmov r0, s6
49 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
50 ; CHECK-NEXT: vmov r1, s4
51 ; CHECK-NEXT: vand q0, q0, q2
52 ; CHECK-NEXT: cmp r0, #0
53 ; CHECK-NEXT: cset r0, eq
54 ; CHECK-NEXT: cmp r0, #0
55 ; CHECK-NEXT: csetm r0, ne
56 ; CHECK-NEXT: cmp r1, #0
57 ; CHECK-NEXT: cset r1, eq
58 ; CHECK-NEXT: cmp r1, #0
59 ; CHECK-NEXT: csetm r1, ne
60 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
61 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
62 ; CHECK-NEXT: vand q0, q0, q1
63 ; CHECK-NEXT: vmov r0, r1, d1
64 ; CHECK-NEXT: vmov r2, r3, d0
65 ; CHECK-NEXT: adds r0, r0, r2
66 ; CHECK-NEXT: adcs r1, r3
69 %c = icmp eq <2 x i32> %b, zeroinitializer
70 %xx = zext <2 x i32> %x to <2 x i64>
71 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
72 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
76 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %b) {
77 ; CHECK-LABEL: add_v2i32_v2i64_sext:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmov r0, s2
80 ; CHECK-NEXT: vmov r1, s0
81 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
82 ; CHECK-NEXT: asrs r0, r0, #31
83 ; CHECK-NEXT: asrs r1, r1, #31
84 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
85 ; CHECK-NEXT: vmov r0, s6
86 ; CHECK-NEXT: vmov r1, s4
87 ; CHECK-NEXT: cmp r0, #0
88 ; CHECK-NEXT: cset r0, eq
89 ; CHECK-NEXT: cmp r0, #0
90 ; CHECK-NEXT: csetm r0, ne
91 ; CHECK-NEXT: cmp r1, #0
92 ; CHECK-NEXT: cset r1, eq
93 ; CHECK-NEXT: cmp r1, #0
94 ; CHECK-NEXT: csetm r1, ne
95 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
96 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
97 ; CHECK-NEXT: vand q0, q0, q1
98 ; CHECK-NEXT: vmov r0, r1, d1
99 ; CHECK-NEXT: vmov r2, r3, d0
100 ; CHECK-NEXT: adds r0, r0, r2
101 ; CHECK-NEXT: adcs r1, r3
104 %c = icmp eq <2 x i32> %b, zeroinitializer
105 %xx = sext <2 x i32> %x to <2 x i64>
106 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
107 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
111 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %b) {
112 ; CHECK-LABEL: add_v8i16_v8i32_zext:
113 ; CHECK: @ %bb.0: @ %entry
114 ; CHECK-NEXT: vpt.i16 eq, q1, zr
115 ; CHECK-NEXT: vaddvt.u16 r0, q0
118 %c = icmp eq <8 x i16> %b, zeroinitializer
119 %xx = zext <8 x i16> %x to <8 x i32>
120 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
121 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
125 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %b) {
126 ; CHECK-LABEL: add_v8i16_v8i32_sext:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: vpt.i16 eq, q1, zr
129 ; CHECK-NEXT: vaddvt.s16 r0, q0
132 %c = icmp eq <8 x i16> %b, zeroinitializer
133 %xx = sext <8 x i16> %x to <8 x i32>
134 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
135 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
139 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %b) {
140 ; CHECK-LABEL: add_v4i16_v4i32_zext:
141 ; CHECK: @ %bb.0: @ %entry
142 ; CHECK-NEXT: vmovlb.u16 q0, q0
143 ; CHECK-NEXT: vmovlb.u16 q1, q1
144 ; CHECK-NEXT: vpt.i32 eq, q1, zr
145 ; CHECK-NEXT: vaddvt.u32 r0, q0
148 %c = icmp eq <4 x i16> %b, zeroinitializer
149 %xx = zext <4 x i16> %x to <4 x i32>
150 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
151 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
155 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %b) {
156 ; CHECK-LABEL: add_v4i16_v4i32_sext:
157 ; CHECK: @ %bb.0: @ %entry
158 ; CHECK-NEXT: vmovlb.s16 q0, q0
159 ; CHECK-NEXT: vmovlb.u16 q1, q1
160 ; CHECK-NEXT: vpt.i32 eq, q1, zr
161 ; CHECK-NEXT: vaddvt.u32 r0, q0
164 %c = icmp eq <4 x i16> %b, zeroinitializer
165 %xx = sext <4 x i16> %x to <4 x i32>
166 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
167 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
171 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %b) {
172 ; CHECK-LABEL: add_v8i16_v8i16:
173 ; CHECK: @ %bb.0: @ %entry
174 ; CHECK-NEXT: vpt.i16 eq, q1, zr
175 ; CHECK-NEXT: vaddvt.u16 r0, q0
176 ; CHECK-NEXT: uxth r0, r0
179 %c = icmp eq <8 x i16> %b, zeroinitializer
180 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
181 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
185 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %b) {
186 ; CHECK-LABEL: add_v8i16_v8i64_zext:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: .vsave {d8, d9}
189 ; CHECK-NEXT: vpush {d8, d9}
190 ; CHECK-NEXT: vmov.i8 q2, #0x0
191 ; CHECK-NEXT: vmov.i8 q3, #0xff
192 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
193 ; CHECK-NEXT: vpsel q2, q3, q2
194 ; CHECK-NEXT: vmov.u16 r0, q2[2]
195 ; CHECK-NEXT: vmov.u16 r1, q2[0]
196 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
197 ; CHECK-NEXT: vmov.u16 r0, q2[3]
198 ; CHECK-NEXT: vmov.u16 r1, q2[1]
199 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
200 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
201 ; CHECK-NEXT: vmov.i64 q1, #0xffff
202 ; CHECK-NEXT: vmrs r0, p0
203 ; CHECK-NEXT: and r2, r0, #1
204 ; CHECK-NEXT: ubfx r1, r0, #4, #1
205 ; CHECK-NEXT: rsbs r2, r2, #0
206 ; CHECK-NEXT: rsbs r1, r1, #0
207 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
208 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
209 ; CHECK-NEXT: vmov.u16 r1, q0[1]
210 ; CHECK-NEXT: vmov.u16 r2, q0[0]
211 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1
212 ; CHECK-NEXT: vand q4, q4, q1
213 ; CHECK-NEXT: vand q3, q4, q3
214 ; CHECK-NEXT: vmov r12, r2, d7
215 ; CHECK-NEXT: vmov r3, r1, d6
216 ; CHECK-NEXT: orrs r1, r2
217 ; CHECK-NEXT: add.w r2, r3, r12
218 ; CHECK-NEXT: ubfx r3, r0, #12, #1
219 ; CHECK-NEXT: ubfx r0, r0, #8, #1
220 ; CHECK-NEXT: rsbs r3, r3, #0
221 ; CHECK-NEXT: rsbs r0, r0, #0
222 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
223 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r3
224 ; CHECK-NEXT: vmov.u16 r0, q0[3]
225 ; CHECK-NEXT: vmov.u16 r3, q0[2]
226 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0
227 ; CHECK-NEXT: vand q4, q4, q1
228 ; CHECK-NEXT: vand q3, q4, q3
229 ; CHECK-NEXT: vmov r0, r3, d6
230 ; CHECK-NEXT: adds r0, r0, r2
231 ; CHECK-NEXT: adcs r1, r3
232 ; CHECK-NEXT: vmov r2, r3, d7
233 ; CHECK-NEXT: adds.w r12, r0, r2
234 ; CHECK-NEXT: vmov.u16 r2, q2[6]
235 ; CHECK-NEXT: adcs r1, r3
236 ; CHECK-NEXT: vmov.u16 r3, q2[4]
237 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
238 ; CHECK-NEXT: vmov.u16 r2, q2[7]
239 ; CHECK-NEXT: vmov.u16 r3, q2[5]
240 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
241 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
242 ; CHECK-NEXT: vmrs r2, p0
243 ; CHECK-NEXT: and r0, r2, #1
244 ; CHECK-NEXT: ubfx r3, r2, #4, #1
245 ; CHECK-NEXT: rsbs r0, r0, #0
246 ; CHECK-NEXT: rsbs r3, r3, #0
247 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
248 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
249 ; CHECK-NEXT: vmov.u16 r0, q0[5]
250 ; CHECK-NEXT: vmov.u16 r3, q0[4]
251 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
252 ; CHECK-NEXT: vand q3, q3, q1
253 ; CHECK-NEXT: vand q2, q3, q2
254 ; CHECK-NEXT: vmov r0, r3, d4
255 ; CHECK-NEXT: adds.w r12, r12, r0
256 ; CHECK-NEXT: adcs r1, r3
257 ; CHECK-NEXT: vmov r3, r0, d5
258 ; CHECK-NEXT: adds.w r3, r3, r12
259 ; CHECK-NEXT: adcs r0, r1
260 ; CHECK-NEXT: ubfx r1, r2, #12, #1
261 ; CHECK-NEXT: ubfx r2, r2, #8, #1
262 ; CHECK-NEXT: rsbs r1, r1, #0
263 ; CHECK-NEXT: rsbs r2, r2, #0
264 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
265 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
266 ; CHECK-NEXT: vmov.u16 r1, q0[7]
267 ; CHECK-NEXT: vmov.u16 r2, q0[6]
268 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
269 ; CHECK-NEXT: vand q0, q0, q1
270 ; CHECK-NEXT: vand q0, q0, q2
271 ; CHECK-NEXT: vmov r1, r2, d0
272 ; CHECK-NEXT: adds r1, r1, r3
273 ; CHECK-NEXT: adcs r2, r0
274 ; CHECK-NEXT: vmov r0, r3, d1
275 ; CHECK-NEXT: adds r0, r0, r1
276 ; CHECK-NEXT: adc.w r1, r2, r3
277 ; CHECK-NEXT: vpop {d8, d9}
280 %c = icmp eq <8 x i16> %b, zeroinitializer
281 %xx = zext <8 x i16> %x to <8 x i64>
282 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
283 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
287 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
288 ; CHECK-LABEL: add_v8i16_v8i64_sext:
289 ; CHECK: @ %bb.0: @ %entry
290 ; CHECK-NEXT: vmov.i8 q2, #0x0
291 ; CHECK-NEXT: vmov.i8 q3, #0xff
292 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
293 ; CHECK-NEXT: vpsel q1, q3, q2
294 ; CHECK-NEXT: vmov.u16 r0, q1[2]
295 ; CHECK-NEXT: vmov.u16 r1, q1[0]
296 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
297 ; CHECK-NEXT: vmov.u16 r0, q1[3]
298 ; CHECK-NEXT: vmov.u16 r1, q1[1]
299 ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
300 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
301 ; CHECK-NEXT: vmrs r0, p0
302 ; CHECK-NEXT: and r2, r0, #1
303 ; CHECK-NEXT: ubfx r1, r0, #4, #1
304 ; CHECK-NEXT: rsbs r2, r2, #0
305 ; CHECK-NEXT: rsbs r1, r1, #0
306 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
307 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
308 ; CHECK-NEXT: vmov.s16 r1, q0[1]
309 ; CHECK-NEXT: vmov.s16 r2, q0[0]
310 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
311 ; CHECK-NEXT: asrs r1, r1, #31
312 ; CHECK-NEXT: asrs r2, r2, #31
313 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
314 ; CHECK-NEXT: vand q2, q3, q2
315 ; CHECK-NEXT: vmov r1, r12, d5
316 ; CHECK-NEXT: vmov r3, r2, d4
317 ; CHECK-NEXT: adds r1, r1, r3
318 ; CHECK-NEXT: ubfx r3, r0, #12, #1
319 ; CHECK-NEXT: ubfx r0, r0, #8, #1
320 ; CHECK-NEXT: rsb.w r3, r3, #0
321 ; CHECK-NEXT: rsb.w r0, r0, #0
322 ; CHECK-NEXT: adc.w r2, r2, r12
323 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
324 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
325 ; CHECK-NEXT: vmov.s16 r0, q0[3]
326 ; CHECK-NEXT: vmov.s16 r3, q0[2]
327 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
328 ; CHECK-NEXT: asrs r0, r0, #31
329 ; CHECK-NEXT: asrs r3, r3, #31
330 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r0
331 ; CHECK-NEXT: vand q2, q3, q2
332 ; CHECK-NEXT: vmov r0, r3, d4
333 ; CHECK-NEXT: adds r0, r0, r1
334 ; CHECK-NEXT: adc.w r1, r2, r3
335 ; CHECK-NEXT: vmov r2, r3, d5
336 ; CHECK-NEXT: adds.w r12, r0, r2
337 ; CHECK-NEXT: vmov.u16 r2, q1[6]
338 ; CHECK-NEXT: adcs r1, r3
339 ; CHECK-NEXT: vmov.u16 r3, q1[4]
340 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
341 ; CHECK-NEXT: vmov.u16 r2, q1[7]
342 ; CHECK-NEXT: vmov.u16 r3, q1[5]
343 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
344 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
345 ; CHECK-NEXT: vmrs r2, p0
346 ; CHECK-NEXT: and r0, r2, #1
347 ; CHECK-NEXT: ubfx r3, r2, #4, #1
348 ; CHECK-NEXT: rsbs r0, r0, #0
349 ; CHECK-NEXT: rsbs r3, r3, #0
350 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
351 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
352 ; CHECK-NEXT: vmov.s16 r0, q0[5]
353 ; CHECK-NEXT: vmov.s16 r3, q0[4]
354 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
355 ; CHECK-NEXT: asrs r0, r0, #31
356 ; CHECK-NEXT: asrs r3, r3, #31
357 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r0
358 ; CHECK-NEXT: vand q1, q2, q1
359 ; CHECK-NEXT: vmov r0, r3, d2
360 ; CHECK-NEXT: adds.w r12, r12, r0
361 ; CHECK-NEXT: adcs r1, r3
362 ; CHECK-NEXT: vmov r3, r0, d3
363 ; CHECK-NEXT: adds.w r3, r3, r12
364 ; CHECK-NEXT: adcs r0, r1
365 ; CHECK-NEXT: ubfx r1, r2, #12, #1
366 ; CHECK-NEXT: ubfx r2, r2, #8, #1
367 ; CHECK-NEXT: rsbs r1, r1, #0
368 ; CHECK-NEXT: rsbs r2, r2, #0
369 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
370 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
371 ; CHECK-NEXT: vmov.s16 r1, q0[7]
372 ; CHECK-NEXT: vmov.s16 r2, q0[6]
373 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
374 ; CHECK-NEXT: asrs r1, r1, #31
375 ; CHECK-NEXT: asrs r2, r2, #31
376 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
377 ; CHECK-NEXT: vand q0, q0, q1
378 ; CHECK-NEXT: vmov r1, r2, d0
379 ; CHECK-NEXT: adds r1, r1, r3
380 ; CHECK-NEXT: adcs r2, r0
381 ; CHECK-NEXT: vmov r0, r3, d1
382 ; CHECK-NEXT: adds r0, r0, r1
383 ; CHECK-NEXT: adc.w r1, r2, r3
386 %c = icmp eq <8 x i16> %b, zeroinitializer
387 %xx = sext <8 x i16> %x to <8 x i64>
388 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
389 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
393 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %b) {
394 ; CHECK-LABEL: add_v4i16_v4i64_zext:
395 ; CHECK: @ %bb.0: @ %entry
396 ; CHECK-NEXT: vmovlb.u16 q0, q0
397 ; CHECK-NEXT: vmovlb.u16 q1, q1
398 ; CHECK-NEXT: vpt.i32 eq, q1, zr
399 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
402 %c = icmp eq <4 x i16> %b, zeroinitializer
403 %xx = zext <4 x i16> %x to <4 x i64>
404 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
405 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
409 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %b) {
410 ; CHECK-LABEL: add_v4i16_v4i64_sext:
411 ; CHECK: @ %bb.0: @ %entry
412 ; CHECK-NEXT: vmovlb.s16 q0, q0
413 ; CHECK-NEXT: vmovlb.u16 q1, q1
414 ; CHECK-NEXT: vpt.i32 eq, q1, zr
415 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
418 %c = icmp eq <4 x i16> %b, zeroinitializer
419 %xx = sext <4 x i16> %x to <4 x i64>
420 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
421 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
425 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
426 ; CHECK-LABEL: add_v2i16_v2i64_zext:
427 ; CHECK: @ %bb.0: @ %entry
428 ; CHECK-NEXT: vmov.i64 q2, #0xffff
429 ; CHECK-NEXT: vand q1, q1, q2
430 ; CHECK-NEXT: vand q0, q0, q2
431 ; CHECK-NEXT: vmov r0, s6
432 ; CHECK-NEXT: vmov r1, s4
433 ; CHECK-NEXT: cmp r0, #0
434 ; CHECK-NEXT: cset r0, eq
435 ; CHECK-NEXT: cmp r0, #0
436 ; CHECK-NEXT: csetm r0, ne
437 ; CHECK-NEXT: cmp r1, #0
438 ; CHECK-NEXT: cset r1, eq
439 ; CHECK-NEXT: cmp r1, #0
440 ; CHECK-NEXT: csetm r1, ne
441 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
442 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
443 ; CHECK-NEXT: vand q0, q0, q1
444 ; CHECK-NEXT: vmov r0, r1, d1
445 ; CHECK-NEXT: vmov r2, r3, d0
446 ; CHECK-NEXT: add r0, r2
447 ; CHECK-NEXT: orrs r1, r3
450 %c = icmp eq <2 x i16> %b, zeroinitializer
451 %xx = zext <2 x i16> %x to <2 x i64>
452 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
453 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
457 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %b) {
458 ; CHECK-LABEL: add_v2i16_v2i64_sext:
459 ; CHECK: @ %bb.0: @ %entry
460 ; CHECK-NEXT: vmov.i32 q2, #0xffff
461 ; CHECK-NEXT: vand q1, q1, q2
462 ; CHECK-NEXT: vmov r0, s6
463 ; CHECK-NEXT: vmov r1, s4
464 ; CHECK-NEXT: cmp r0, #0
465 ; CHECK-NEXT: cset r0, eq
466 ; CHECK-NEXT: cmp r0, #0
467 ; CHECK-NEXT: csetm r0, ne
468 ; CHECK-NEXT: cmp r1, #0
469 ; CHECK-NEXT: cset r1, eq
470 ; CHECK-NEXT: cmp r1, #0
471 ; CHECK-NEXT: csetm r1, ne
472 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
473 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
474 ; CHECK-NEXT: vmov r0, s2
475 ; CHECK-NEXT: vmov r1, s0
476 ; CHECK-NEXT: sxth r0, r0
477 ; CHECK-NEXT: sxth r1, r1
478 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
479 ; CHECK-NEXT: asrs r0, r0, #31
480 ; CHECK-NEXT: asrs r1, r1, #31
481 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
482 ; CHECK-NEXT: vand q0, q0, q1
483 ; CHECK-NEXT: vmov r0, r1, d1
484 ; CHECK-NEXT: vmov r2, r3, d0
485 ; CHECK-NEXT: adds r0, r0, r2
486 ; CHECK-NEXT: adcs r1, r3
489 %c = icmp eq <2 x i16> %b, zeroinitializer
490 %xx = sext <2 x i16> %x to <2 x i64>
491 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
492 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
496 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %b) {
497 ; CHECK-LABEL: add_v16i8_v16i32_zext:
498 ; CHECK: @ %bb.0: @ %entry
499 ; CHECK-NEXT: vpt.i8 eq, q1, zr
500 ; CHECK-NEXT: vaddvt.u8 r0, q0
503 %c = icmp eq <16 x i8> %b, zeroinitializer
504 %xx = zext <16 x i8> %x to <16 x i32>
505 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
506 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
510 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %b) {
511 ; CHECK-LABEL: add_v16i8_v16i32_sext:
512 ; CHECK: @ %bb.0: @ %entry
513 ; CHECK-NEXT: vpt.i8 eq, q1, zr
514 ; CHECK-NEXT: vaddvt.s8 r0, q0
517 %c = icmp eq <16 x i8> %b, zeroinitializer
518 %xx = sext <16 x i8> %x to <16 x i32>
519 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
520 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
524 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %b) {
525 ; CHECK-LABEL: add_v8i8_v8i32_zext:
526 ; CHECK: @ %bb.0: @ %entry
527 ; CHECK-NEXT: vmovlb.u8 q0, q0
528 ; CHECK-NEXT: vmovlb.u8 q1, q1
529 ; CHECK-NEXT: vpt.i16 eq, q1, zr
530 ; CHECK-NEXT: vaddvt.u16 r0, q0
533 %c = icmp eq <8 x i8> %b, zeroinitializer
534 %xx = zext <8 x i8> %x to <8 x i32>
535 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
536 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
540 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %b) {
541 ; CHECK-LABEL: add_v8i8_v8i32_sext:
542 ; CHECK: @ %bb.0: @ %entry
543 ; CHECK-NEXT: vmovlb.s8 q0, q0
544 ; CHECK-NEXT: vmovlb.u8 q1, q1
545 ; CHECK-NEXT: vpt.i16 eq, q1, zr
546 ; CHECK-NEXT: vaddvt.s16 r0, q0
549 %c = icmp eq <8 x i8> %b, zeroinitializer
550 %xx = sext <8 x i8> %x to <8 x i32>
551 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
552 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
556 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %b) {
557 ; CHECK-LABEL: add_v4i8_v4i32_zext:
558 ; CHECK: @ %bb.0: @ %entry
559 ; CHECK-NEXT: vmov.i32 q2, #0xff
560 ; CHECK-NEXT: vand q0, q0, q2
561 ; CHECK-NEXT: vand q1, q1, q2
562 ; CHECK-NEXT: vpt.i32 eq, q1, zr
563 ; CHECK-NEXT: vaddvt.u32 r0, q0
566 %c = icmp eq <4 x i8> %b, zeroinitializer
567 %xx = zext <4 x i8> %x to <4 x i32>
568 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
569 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
573 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %b) {
574 ; CHECK-LABEL: add_v4i8_v4i32_sext:
575 ; CHECK: @ %bb.0: @ %entry
576 ; CHECK-NEXT: vmovlb.s8 q0, q0
577 ; CHECK-NEXT: vmov.i32 q2, #0xff
578 ; CHECK-NEXT: vand q1, q1, q2
579 ; CHECK-NEXT: vmovlb.s16 q0, q0
580 ; CHECK-NEXT: vpt.i32 eq, q1, zr
581 ; CHECK-NEXT: vaddvt.u32 r0, q0
584 %c = icmp eq <4 x i8> %b, zeroinitializer
585 %xx = sext <4 x i8> %x to <4 x i32>
586 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
587 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
591 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %b) {
592 ; CHECK-LABEL: add_v16i8_v16i16_zext:
593 ; CHECK: @ %bb.0: @ %entry
594 ; CHECK-NEXT: vpt.i8 eq, q1, zr
595 ; CHECK-NEXT: vaddvt.u8 r0, q0
596 ; CHECK-NEXT: uxth r0, r0
599 %c = icmp eq <16 x i8> %b, zeroinitializer
600 %xx = zext <16 x i8> %x to <16 x i16>
601 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
602 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
606 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %b) {
607 ; CHECK-LABEL: add_v16i8_v16i16_sext:
608 ; CHECK: @ %bb.0: @ %entry
609 ; CHECK-NEXT: vpt.i8 eq, q1, zr
610 ; CHECK-NEXT: vaddvt.s8 r0, q0
611 ; CHECK-NEXT: sxth r0, r0
614 %c = icmp eq <16 x i8> %b, zeroinitializer
615 %xx = sext <16 x i8> %x to <16 x i16>
616 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
617 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
621 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %b) {
622 ; CHECK-LABEL: add_v8i8_v8i16_zext:
623 ; CHECK: @ %bb.0: @ %entry
624 ; CHECK-NEXT: vmovlb.u8 q0, q0
625 ; CHECK-NEXT: vmovlb.u8 q1, q1
626 ; CHECK-NEXT: vpt.i16 eq, q1, zr
627 ; CHECK-NEXT: vaddvt.u16 r0, q0
628 ; CHECK-NEXT: uxth r0, r0
631 %c = icmp eq <8 x i8> %b, zeroinitializer
632 %xx = zext <8 x i8> %x to <8 x i16>
633 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
634 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
638 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %b) {
639 ; CHECK-LABEL: add_v8i8_v8i16_sext:
640 ; CHECK: @ %bb.0: @ %entry
641 ; CHECK-NEXT: vmovlb.s8 q0, q0
642 ; CHECK-NEXT: vmovlb.u8 q1, q1
643 ; CHECK-NEXT: vpt.i16 eq, q1, zr
644 ; CHECK-NEXT: vaddvt.u16 r0, q0
645 ; CHECK-NEXT: sxth r0, r0
648 %c = icmp eq <8 x i8> %b, zeroinitializer
649 %xx = sext <8 x i8> %x to <8 x i16>
650 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
651 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
655 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %b) {
656 ; CHECK-LABEL: add_v16i8_v16i8:
657 ; CHECK: @ %bb.0: @ %entry
658 ; CHECK-NEXT: vpt.i8 eq, q1, zr
659 ; CHECK-NEXT: vaddvt.u8 r0, q0
660 ; CHECK-NEXT: uxtb r0, r0
663 %c = icmp eq <16 x i8> %b, zeroinitializer
664 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
665 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
669 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %b) {
670 ; CHECK-LABEL: add_v16i8_v16i64_zext:
671 ; CHECK: @ %bb.0: @ %entry
672 ; CHECK-NEXT: .save {r7, lr}
673 ; CHECK-NEXT: push {r7, lr}
674 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
675 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
676 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
677 ; CHECK-NEXT: vmov.i8 q2, #0x0
678 ; CHECK-NEXT: vmov.i8 q3, #0xff
679 ; CHECK-NEXT: vpsel q4, q3, q2
680 ; CHECK-NEXT: vmov.u8 r0, q4[0]
681 ; CHECK-NEXT: vmov.16 q1[0], r0
682 ; CHECK-NEXT: vmov.u8 r0, q4[1]
683 ; CHECK-NEXT: vmov.16 q1[1], r0
684 ; CHECK-NEXT: vmov.u8 r0, q4[2]
685 ; CHECK-NEXT: vmov.16 q1[2], r0
686 ; CHECK-NEXT: vmov.u8 r0, q4[3]
687 ; CHECK-NEXT: vmov.16 q1[3], r0
688 ; CHECK-NEXT: vmov.u8 r0, q4[4]
689 ; CHECK-NEXT: vmov.16 q1[4], r0
690 ; CHECK-NEXT: vmov.u8 r0, q4[5]
691 ; CHECK-NEXT: vmov.16 q1[5], r0
692 ; CHECK-NEXT: vmov.u8 r0, q4[6]
693 ; CHECK-NEXT: vmov.16 q1[6], r0
694 ; CHECK-NEXT: vmov.u8 r0, q4[7]
695 ; CHECK-NEXT: vmov.16 q1[7], r0
696 ; CHECK-NEXT: vcmp.i16 ne, q1, zr
697 ; CHECK-NEXT: vpsel q5, q3, q2
698 ; CHECK-NEXT: vmov.u16 r0, q5[2]
699 ; CHECK-NEXT: vmov.u16 r1, q5[0]
700 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
701 ; CHECK-NEXT: vmov.u16 r0, q5[3]
702 ; CHECK-NEXT: vmov.u16 r1, q5[1]
703 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
704 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
705 ; CHECK-NEXT: vmov.i64 q1, #0xff
706 ; CHECK-NEXT: vmrs r0, p0
707 ; CHECK-NEXT: and r2, r0, #1
708 ; CHECK-NEXT: ubfx r1, r0, #4, #1
709 ; CHECK-NEXT: rsbs r2, r2, #0
710 ; CHECK-NEXT: rsbs r1, r1, #0
711 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
712 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r1
713 ; CHECK-NEXT: vmov.u8 r1, q0[1]
714 ; CHECK-NEXT: vmov.u8 r2, q0[0]
715 ; CHECK-NEXT: vmov q7[2], q7[0], r2, r1
716 ; CHECK-NEXT: vand q7, q7, q1
717 ; CHECK-NEXT: vand q6, q7, q6
718 ; CHECK-NEXT: vmov r12, r2, d13
719 ; CHECK-NEXT: vmov r3, r1, d12
720 ; CHECK-NEXT: orrs r1, r2
721 ; CHECK-NEXT: add.w r2, r3, r12
722 ; CHECK-NEXT: ubfx r3, r0, #12, #1
723 ; CHECK-NEXT: ubfx r0, r0, #8, #1
724 ; CHECK-NEXT: rsbs r3, r3, #0
725 ; CHECK-NEXT: rsbs r0, r0, #0
726 ; CHECK-NEXT: vmov q6[2], q6[0], r0, r3
727 ; CHECK-NEXT: vmov q6[3], q6[1], r0, r3
728 ; CHECK-NEXT: vmov.u8 r0, q0[3]
729 ; CHECK-NEXT: vmov.u8 r3, q0[2]
730 ; CHECK-NEXT: vmov q7[2], q7[0], r3, r0
731 ; CHECK-NEXT: vand q7, q7, q1
732 ; CHECK-NEXT: vand q6, q7, q6
733 ; CHECK-NEXT: vmov r0, r3, d12
734 ; CHECK-NEXT: adds r0, r0, r2
735 ; CHECK-NEXT: adcs r1, r3
736 ; CHECK-NEXT: vmov r2, r3, d13
737 ; CHECK-NEXT: adds.w r12, r0, r2
738 ; CHECK-NEXT: vmov.u16 r2, q5[6]
739 ; CHECK-NEXT: adcs r1, r3
740 ; CHECK-NEXT: vmov.u16 r3, q5[4]
741 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
742 ; CHECK-NEXT: vmov.u16 r2, q5[7]
743 ; CHECK-NEXT: vmov.u16 r3, q5[5]
744 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
745 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
746 ; CHECK-NEXT: vmrs r2, p0
747 ; CHECK-NEXT: and r0, r2, #1
748 ; CHECK-NEXT: ubfx r3, r2, #4, #1
749 ; CHECK-NEXT: rsbs r0, r0, #0
750 ; CHECK-NEXT: rsbs r3, r3, #0
751 ; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
752 ; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
753 ; CHECK-NEXT: vmov.u8 r0, q0[5]
754 ; CHECK-NEXT: vmov.u8 r3, q0[4]
755 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r0
756 ; CHECK-NEXT: vand q6, q6, q1
757 ; CHECK-NEXT: vand q5, q6, q5
758 ; CHECK-NEXT: vmov r0, r3, d10
759 ; CHECK-NEXT: adds.w r12, r12, r0
760 ; CHECK-NEXT: adcs r1, r3
761 ; CHECK-NEXT: vmov r3, r0, d11
762 ; CHECK-NEXT: adds.w r3, r3, r12
763 ; CHECK-NEXT: adcs r0, r1
764 ; CHECK-NEXT: ubfx r1, r2, #12, #1
765 ; CHECK-NEXT: ubfx r2, r2, #8, #1
766 ; CHECK-NEXT: rsbs r1, r1, #0
767 ; CHECK-NEXT: rsbs r2, r2, #0
768 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r1
769 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r1
770 ; CHECK-NEXT: vmov.u8 r1, q0[7]
771 ; CHECK-NEXT: vmov.u8 r2, q0[6]
772 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
773 ; CHECK-NEXT: vand q6, q6, q1
774 ; CHECK-NEXT: vand q5, q6, q5
775 ; CHECK-NEXT: vmov r1, r2, d10
776 ; CHECK-NEXT: adds r1, r1, r3
777 ; CHECK-NEXT: adcs r2, r0
778 ; CHECK-NEXT: vmov r0, r3, d11
779 ; CHECK-NEXT: adds.w r12, r1, r0
780 ; CHECK-NEXT: adc.w r1, r2, r3
781 ; CHECK-NEXT: vmov.u8 r2, q4[8]
782 ; CHECK-NEXT: vmov.16 q5[0], r2
783 ; CHECK-NEXT: vmov.u8 r2, q4[9]
784 ; CHECK-NEXT: vmov.16 q5[1], r2
785 ; CHECK-NEXT: vmov.u8 r2, q4[10]
786 ; CHECK-NEXT: vmov.16 q5[2], r2
787 ; CHECK-NEXT: vmov.u8 r2, q4[11]
788 ; CHECK-NEXT: vmov.16 q5[3], r2
789 ; CHECK-NEXT: vmov.u8 r2, q4[12]
790 ; CHECK-NEXT: vmov.16 q5[4], r2
791 ; CHECK-NEXT: vmov.u8 r2, q4[13]
792 ; CHECK-NEXT: vmov.16 q5[5], r2
793 ; CHECK-NEXT: vmov.u8 r2, q4[14]
794 ; CHECK-NEXT: vmov.16 q5[6], r2
795 ; CHECK-NEXT: vmov.u8 r2, q4[15]
796 ; CHECK-NEXT: vmov.16 q5[7], r2
797 ; CHECK-NEXT: vcmp.i16 ne, q5, zr
798 ; CHECK-NEXT: vpsel q2, q3, q2
799 ; CHECK-NEXT: vmov.u16 r2, q2[2]
800 ; CHECK-NEXT: vmov.u16 r3, q2[0]
801 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
802 ; CHECK-NEXT: vmov.u16 r2, q2[3]
803 ; CHECK-NEXT: vmov.u16 r3, q2[1]
804 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
805 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
806 ; CHECK-NEXT: vmrs r2, p0
807 ; CHECK-NEXT: and r0, r2, #1
808 ; CHECK-NEXT: ubfx r3, r2, #4, #1
809 ; CHECK-NEXT: rsbs r0, r0, #0
810 ; CHECK-NEXT: rsbs r3, r3, #0
811 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
812 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r3
813 ; CHECK-NEXT: vmov.u8 r0, q0[9]
814 ; CHECK-NEXT: vmov.u8 r3, q0[8]
815 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0
816 ; CHECK-NEXT: vand q4, q4, q1
817 ; CHECK-NEXT: vand q3, q4, q3
818 ; CHECK-NEXT: vmov r0, r3, d6
819 ; CHECK-NEXT: adds.w r12, r12, r0
820 ; CHECK-NEXT: adcs r1, r3
821 ; CHECK-NEXT: vmov r3, r0, d7
822 ; CHECK-NEXT: adds.w r3, r3, r12
823 ; CHECK-NEXT: adcs r0, r1
824 ; CHECK-NEXT: ubfx r1, r2, #12, #1
825 ; CHECK-NEXT: ubfx r2, r2, #8, #1
826 ; CHECK-NEXT: rsbs r1, r1, #0
827 ; CHECK-NEXT: rsbs r2, r2, #0
828 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
829 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
830 ; CHECK-NEXT: vmov.u8 r1, q0[11]
831 ; CHECK-NEXT: vmov.u8 r2, q0[10]
832 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1
833 ; CHECK-NEXT: vand q4, q4, q1
834 ; CHECK-NEXT: vand q3, q4, q3
835 ; CHECK-NEXT: vmov r1, r2, d6
836 ; CHECK-NEXT: adds r1, r1, r3
837 ; CHECK-NEXT: adcs r0, r2
838 ; CHECK-NEXT: vmov r2, r3, d7
839 ; CHECK-NEXT: adds r1, r1, r2
840 ; CHECK-NEXT: vmov.u16 r2, q2[6]
841 ; CHECK-NEXT: adc.w r12, r0, r3
842 ; CHECK-NEXT: vmov.u16 r3, q2[4]
843 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
844 ; CHECK-NEXT: vmov.u16 r2, q2[7]
845 ; CHECK-NEXT: vmov.u16 r3, q2[5]
846 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
847 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
848 ; CHECK-NEXT: vmrs r2, p0
849 ; CHECK-NEXT: and r0, r2, #1
850 ; CHECK-NEXT: ubfx r3, r2, #4, #1
851 ; CHECK-NEXT: rsbs r0, r0, #0
852 ; CHECK-NEXT: rsbs r3, r3, #0
853 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
854 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
855 ; CHECK-NEXT: vmov.u8 r0, q0[13]
856 ; CHECK-NEXT: vmov.u8 r3, q0[12]
857 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
858 ; CHECK-NEXT: vand q3, q3, q1
859 ; CHECK-NEXT: vand q2, q3, q2
860 ; CHECK-NEXT: vmov r0, r3, d4
861 ; CHECK-NEXT: adds.w lr, r1, r0
862 ; CHECK-NEXT: adc.w r1, r12, r3
863 ; CHECK-NEXT: vmov r3, r0, d5
864 ; CHECK-NEXT: adds.w r3, r3, lr
865 ; CHECK-NEXT: adcs r0, r1
866 ; CHECK-NEXT: ubfx r1, r2, #12, #1
867 ; CHECK-NEXT: ubfx r2, r2, #8, #1
868 ; CHECK-NEXT: rsbs r1, r1, #0
869 ; CHECK-NEXT: rsbs r2, r2, #0
870 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
871 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
872 ; CHECK-NEXT: vmov.u8 r1, q0[15]
873 ; CHECK-NEXT: vmov.u8 r2, q0[14]
874 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
875 ; CHECK-NEXT: vand q0, q0, q1
876 ; CHECK-NEXT: vand q0, q0, q2
877 ; CHECK-NEXT: vmov r1, r2, d0
878 ; CHECK-NEXT: adds r1, r1, r3
879 ; CHECK-NEXT: adcs r2, r0
880 ; CHECK-NEXT: vmov r0, r3, d1
881 ; CHECK-NEXT: adds r0, r0, r1
882 ; CHECK-NEXT: adc.w r1, r2, r3
883 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
884 ; CHECK-NEXT: pop {r7, pc}
886 %c = icmp eq <16 x i8> %b, zeroinitializer
887 %xx = zext <16 x i8> %x to <16 x i64>
888 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
889 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
893 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
894 ; CHECK-LABEL: add_v16i8_v16i64_sext:
895 ; CHECK: @ %bb.0: @ %entry
896 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
897 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
898 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
899 ; CHECK-NEXT: vmov.i8 q1, #0x0
900 ; CHECK-NEXT: vmov.i8 q2, #0xff
901 ; CHECK-NEXT: vpsel q3, q2, q1
902 ; CHECK-NEXT: vmov.u8 r0, q3[0]
903 ; CHECK-NEXT: vmov.16 q4[0], r0
904 ; CHECK-NEXT: vmov.u8 r0, q3[1]
905 ; CHECK-NEXT: vmov.16 q4[1], r0
906 ; CHECK-NEXT: vmov.u8 r0, q3[2]
907 ; CHECK-NEXT: vmov.16 q4[2], r0
908 ; CHECK-NEXT: vmov.u8 r0, q3[3]
909 ; CHECK-NEXT: vmov.16 q4[3], r0
910 ; CHECK-NEXT: vmov.u8 r0, q3[4]
911 ; CHECK-NEXT: vmov.16 q4[4], r0
912 ; CHECK-NEXT: vmov.u8 r0, q3[5]
913 ; CHECK-NEXT: vmov.16 q4[5], r0
914 ; CHECK-NEXT: vmov.u8 r0, q3[6]
915 ; CHECK-NEXT: vmov.16 q4[6], r0
916 ; CHECK-NEXT: vmov.u8 r0, q3[7]
917 ; CHECK-NEXT: vmov.16 q4[7], r0
918 ; CHECK-NEXT: vcmp.i16 ne, q4, zr
919 ; CHECK-NEXT: vpsel q4, q2, q1
920 ; CHECK-NEXT: vmov.u16 r0, q4[2]
921 ; CHECK-NEXT: vmov.u16 r1, q4[0]
922 ; CHECK-NEXT: vmov q5[2], q5[0], r1, r0
923 ; CHECK-NEXT: vmov.u16 r0, q4[3]
924 ; CHECK-NEXT: vmov.u16 r1, q4[1]
925 ; CHECK-NEXT: vmov q5[3], q5[1], r1, r0
926 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
927 ; CHECK-NEXT: vmrs r0, p0
928 ; CHECK-NEXT: and r2, r0, #1
929 ; CHECK-NEXT: ubfx r1, r0, #4, #1
930 ; CHECK-NEXT: rsbs r2, r2, #0
931 ; CHECK-NEXT: rsbs r1, r1, #0
932 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r1
933 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r1
934 ; CHECK-NEXT: vmov.s8 r1, q0[1]
935 ; CHECK-NEXT: vmov.s8 r2, q0[0]
936 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
937 ; CHECK-NEXT: asrs r1, r1, #31
938 ; CHECK-NEXT: asrs r2, r2, #31
939 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r1
940 ; CHECK-NEXT: vand q5, q6, q5
941 ; CHECK-NEXT: vmov r1, r12, d11
942 ; CHECK-NEXT: vmov r3, r2, d10
943 ; CHECK-NEXT: adds r1, r1, r3
944 ; CHECK-NEXT: ubfx r3, r0, #12, #1
945 ; CHECK-NEXT: ubfx r0, r0, #8, #1
946 ; CHECK-NEXT: rsb.w r3, r3, #0
947 ; CHECK-NEXT: rsb.w r0, r0, #0
948 ; CHECK-NEXT: adc.w r2, r2, r12
949 ; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
950 ; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
951 ; CHECK-NEXT: vmov.s8 r0, q0[3]
952 ; CHECK-NEXT: vmov.s8 r3, q0[2]
953 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r0
954 ; CHECK-NEXT: asrs r0, r0, #31
955 ; CHECK-NEXT: asrs r3, r3, #31
956 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r0
957 ; CHECK-NEXT: vand q5, q6, q5
958 ; CHECK-NEXT: vmov r0, r3, d10
959 ; CHECK-NEXT: adds r0, r0, r1
960 ; CHECK-NEXT: adc.w r1, r2, r3
961 ; CHECK-NEXT: vmov r2, r3, d11
962 ; CHECK-NEXT: adds.w r12, r0, r2
963 ; CHECK-NEXT: vmov.u16 r2, q4[6]
964 ; CHECK-NEXT: adcs r1, r3
965 ; CHECK-NEXT: vmov.u16 r3, q4[4]
966 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
967 ; CHECK-NEXT: vmov.u16 r2, q4[7]
968 ; CHECK-NEXT: vmov.u16 r3, q4[5]
969 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
970 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
971 ; CHECK-NEXT: vmrs r2, p0
972 ; CHECK-NEXT: and r0, r2, #1
973 ; CHECK-NEXT: ubfx r3, r2, #4, #1
974 ; CHECK-NEXT: rsbs r0, r0, #0
975 ; CHECK-NEXT: rsbs r3, r3, #0
976 ; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
977 ; CHECK-NEXT: vmov q4[3], q4[1], r0, r3
978 ; CHECK-NEXT: vmov.s8 r0, q0[5]
979 ; CHECK-NEXT: vmov.s8 r3, q0[4]
980 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r0
981 ; CHECK-NEXT: asrs r0, r0, #31
982 ; CHECK-NEXT: asrs r3, r3, #31
983 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r0
984 ; CHECK-NEXT: vand q4, q5, q4
985 ; CHECK-NEXT: vmov r0, r3, d8
986 ; CHECK-NEXT: adds.w r12, r12, r0
987 ; CHECK-NEXT: adcs r1, r3
988 ; CHECK-NEXT: vmov r3, r0, d9
989 ; CHECK-NEXT: adds.w r3, r3, r12
990 ; CHECK-NEXT: adcs r0, r1
991 ; CHECK-NEXT: ubfx r1, r2, #12, #1
992 ; CHECK-NEXT: ubfx r2, r2, #8, #1
993 ; CHECK-NEXT: rsbs r1, r1, #0
994 ; CHECK-NEXT: rsbs r2, r2, #0
995 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1
996 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r1
997 ; CHECK-NEXT: vmov.s8 r1, q0[7]
998 ; CHECK-NEXT: vmov.s8 r2, q0[6]
999 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r1
1000 ; CHECK-NEXT: asrs r1, r1, #31
1001 ; CHECK-NEXT: asrs r2, r2, #31
1002 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r1
1003 ; CHECK-NEXT: vand q4, q5, q4
1004 ; CHECK-NEXT: vmov r1, r2, d8
1005 ; CHECK-NEXT: adds r1, r1, r3
1006 ; CHECK-NEXT: adcs r2, r0
1007 ; CHECK-NEXT: vmov r0, r3, d9
1008 ; CHECK-NEXT: adds.w r12, r1, r0
1009 ; CHECK-NEXT: adc.w r1, r2, r3
1010 ; CHECK-NEXT: vmov.u8 r2, q3[8]
1011 ; CHECK-NEXT: vmov.16 q4[0], r2
1012 ; CHECK-NEXT: vmov.u8 r2, q3[9]
1013 ; CHECK-NEXT: vmov.16 q4[1], r2
1014 ; CHECK-NEXT: vmov.u8 r2, q3[10]
1015 ; CHECK-NEXT: vmov.16 q4[2], r2
1016 ; CHECK-NEXT: vmov.u8 r2, q3[11]
1017 ; CHECK-NEXT: vmov.16 q4[3], r2
1018 ; CHECK-NEXT: vmov.u8 r2, q3[12]
1019 ; CHECK-NEXT: vmov.16 q4[4], r2
1020 ; CHECK-NEXT: vmov.u8 r2, q3[13]
1021 ; CHECK-NEXT: vmov.16 q4[5], r2
1022 ; CHECK-NEXT: vmov.u8 r2, q3[14]
1023 ; CHECK-NEXT: vmov.16 q4[6], r2
1024 ; CHECK-NEXT: vmov.u8 r2, q3[15]
1025 ; CHECK-NEXT: vmov.16 q4[7], r2
1026 ; CHECK-NEXT: vcmp.i16 ne, q4, zr
1027 ; CHECK-NEXT: vpsel q1, q2, q1
1028 ; CHECK-NEXT: vmov.u16 r2, q1[2]
1029 ; CHECK-NEXT: vmov.u16 r3, q1[0]
1030 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1031 ; CHECK-NEXT: vmov.u16 r2, q1[3]
1032 ; CHECK-NEXT: vmov.u16 r3, q1[1]
1033 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
1034 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1035 ; CHECK-NEXT: vmrs r2, p0
1036 ; CHECK-NEXT: and r0, r2, #1
1037 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1038 ; CHECK-NEXT: rsbs r0, r0, #0
1039 ; CHECK-NEXT: rsbs r3, r3, #0
1040 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
1041 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
1042 ; CHECK-NEXT: vmov.s8 r0, q0[9]
1043 ; CHECK-NEXT: vmov.s8 r3, q0[8]
1044 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
1045 ; CHECK-NEXT: asrs r0, r0, #31
1046 ; CHECK-NEXT: asrs r3, r3, #31
1047 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r0
1048 ; CHECK-NEXT: vand q2, q3, q2
1049 ; CHECK-NEXT: vmov r0, r3, d4
1050 ; CHECK-NEXT: adds.w r12, r12, r0
1051 ; CHECK-NEXT: adcs r1, r3
1052 ; CHECK-NEXT: vmov r3, r0, d5
1053 ; CHECK-NEXT: adds.w r3, r3, r12
1054 ; CHECK-NEXT: adcs r0, r1
1055 ; CHECK-NEXT: ubfx r1, r2, #12, #1
1056 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1057 ; CHECK-NEXT: rsbs r1, r1, #0
1058 ; CHECK-NEXT: rsbs r2, r2, #0
1059 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
1060 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
1061 ; CHECK-NEXT: vmov.s8 r1, q0[11]
1062 ; CHECK-NEXT: vmov.s8 r2, q0[10]
1063 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
1064 ; CHECK-NEXT: asrs r1, r1, #31
1065 ; CHECK-NEXT: asrs r2, r2, #31
1066 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
1067 ; CHECK-NEXT: vand q2, q3, q2
1068 ; CHECK-NEXT: vmov r1, r2, d4
1069 ; CHECK-NEXT: adds r1, r1, r3
1070 ; CHECK-NEXT: adcs r2, r0
1071 ; CHECK-NEXT: vmov r0, r3, d5
1072 ; CHECK-NEXT: adds.w r12, r1, r0
1073 ; CHECK-NEXT: adc.w r1, r2, r3
1074 ; CHECK-NEXT: vmov.u16 r2, q1[6]
1075 ; CHECK-NEXT: vmov.u16 r3, q1[4]
1076 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1077 ; CHECK-NEXT: vmov.u16 r2, q1[7]
1078 ; CHECK-NEXT: vmov.u16 r3, q1[5]
1079 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
1080 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1081 ; CHECK-NEXT: vmrs r2, p0
1082 ; CHECK-NEXT: and r0, r2, #1
1083 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1084 ; CHECK-NEXT: rsbs r0, r0, #0
1085 ; CHECK-NEXT: rsbs r3, r3, #0
1086 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
1087 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
1088 ; CHECK-NEXT: vmov.s8 r0, q0[13]
1089 ; CHECK-NEXT: vmov.s8 r3, q0[12]
1090 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
1091 ; CHECK-NEXT: asrs r0, r0, #31
1092 ; CHECK-NEXT: asrs r3, r3, #31
1093 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r0
1094 ; CHECK-NEXT: vand q1, q2, q1
1095 ; CHECK-NEXT: vmov r0, r3, d2
1096 ; CHECK-NEXT: adds.w r12, r12, r0
1097 ; CHECK-NEXT: adcs r1, r3
1098 ; CHECK-NEXT: vmov r3, r0, d3
1099 ; CHECK-NEXT: adds.w r3, r3, r12
1100 ; CHECK-NEXT: adcs r0, r1
1101 ; CHECK-NEXT: ubfx r1, r2, #12, #1
1102 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1103 ; CHECK-NEXT: rsbs r1, r1, #0
1104 ; CHECK-NEXT: rsbs r2, r2, #0
1105 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
1106 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
1107 ; CHECK-NEXT: vmov.s8 r1, q0[15]
1108 ; CHECK-NEXT: vmov.s8 r2, q0[14]
1109 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
1110 ; CHECK-NEXT: asrs r1, r1, #31
1111 ; CHECK-NEXT: asrs r2, r2, #31
1112 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
1113 ; CHECK-NEXT: vand q0, q0, q1
1114 ; CHECK-NEXT: vmov r1, r2, d0
1115 ; CHECK-NEXT: adds r1, r1, r3
1116 ; CHECK-NEXT: adcs r2, r0
1117 ; CHECK-NEXT: vmov r0, r3, d1
1118 ; CHECK-NEXT: adds r0, r0, r1
1119 ; CHECK-NEXT: adc.w r1, r2, r3
1120 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
1123 %c = icmp eq <16 x i8> %b, zeroinitializer
1124 %xx = sext <16 x i8> %x to <16 x i64>
1125 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
1126 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
1130 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %b) {
1131 ; CHECK-LABEL: add_v8i8_v8i64_zext:
1132 ; CHECK: @ %bb.0: @ %entry
1133 ; CHECK-NEXT: .vsave {d8, d9}
1134 ; CHECK-NEXT: vpush {d8, d9}
1135 ; CHECK-NEXT: vmovlb.u8 q1, q1
1136 ; CHECK-NEXT: vmov.i8 q2, #0xff
1137 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1138 ; CHECK-NEXT: vmov.i8 q1, #0x0
1139 ; CHECK-NEXT: vpsel q2, q2, q1
1140 ; CHECK-NEXT: vmovlb.u8 q0, q0
1141 ; CHECK-NEXT: vmov.u16 r0, q2[2]
1142 ; CHECK-NEXT: vmov.u16 r1, q2[0]
1143 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1144 ; CHECK-NEXT: vmov.u16 r0, q2[3]
1145 ; CHECK-NEXT: vmov.u16 r1, q2[1]
1146 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1147 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1148 ; CHECK-NEXT: vmov.i64 q1, #0xffff
1149 ; CHECK-NEXT: vmrs r0, p0
1150 ; CHECK-NEXT: and r2, r0, #1
1151 ; CHECK-NEXT: ubfx r1, r0, #4, #1
1152 ; CHECK-NEXT: rsbs r2, r2, #0
1153 ; CHECK-NEXT: rsbs r1, r1, #0
1154 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
1155 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
1156 ; CHECK-NEXT: vmov.u16 r1, q0[1]
1157 ; CHECK-NEXT: vmov.u16 r2, q0[0]
1158 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1
1159 ; CHECK-NEXT: vand q4, q4, q1
1160 ; CHECK-NEXT: vand q3, q4, q3
1161 ; CHECK-NEXT: vmov r12, r2, d7
1162 ; CHECK-NEXT: vmov r3, r1, d6
1163 ; CHECK-NEXT: orrs r1, r2
1164 ; CHECK-NEXT: add.w r2, r3, r12
1165 ; CHECK-NEXT: ubfx r3, r0, #12, #1
1166 ; CHECK-NEXT: ubfx r0, r0, #8, #1
1167 ; CHECK-NEXT: rsbs r3, r3, #0
1168 ; CHECK-NEXT: rsbs r0, r0, #0
1169 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
1170 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r3
1171 ; CHECK-NEXT: vmov.u16 r0, q0[3]
1172 ; CHECK-NEXT: vmov.u16 r3, q0[2]
1173 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0
1174 ; CHECK-NEXT: vand q4, q4, q1
1175 ; CHECK-NEXT: vand q3, q4, q3
1176 ; CHECK-NEXT: vmov r0, r3, d6
1177 ; CHECK-NEXT: adds r0, r0, r2
1178 ; CHECK-NEXT: adcs r1, r3
1179 ; CHECK-NEXT: vmov r2, r3, d7
1180 ; CHECK-NEXT: adds.w r12, r0, r2
1181 ; CHECK-NEXT: vmov.u16 r2, q2[6]
1182 ; CHECK-NEXT: adcs r1, r3
1183 ; CHECK-NEXT: vmov.u16 r3, q2[4]
1184 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
1185 ; CHECK-NEXT: vmov.u16 r2, q2[7]
1186 ; CHECK-NEXT: vmov.u16 r3, q2[5]
1187 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
1188 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1189 ; CHECK-NEXT: vmrs r2, p0
1190 ; CHECK-NEXT: and r0, r2, #1
1191 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1192 ; CHECK-NEXT: rsbs r0, r0, #0
1193 ; CHECK-NEXT: rsbs r3, r3, #0
1194 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
1195 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
1196 ; CHECK-NEXT: vmov.u16 r0, q0[5]
1197 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1198 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
1199 ; CHECK-NEXT: vand q3, q3, q1
1200 ; CHECK-NEXT: vand q2, q3, q2
1201 ; CHECK-NEXT: vmov r0, r3, d4
1202 ; CHECK-NEXT: adds.w r12, r12, r0
1203 ; CHECK-NEXT: adcs r1, r3
1204 ; CHECK-NEXT: vmov r3, r0, d5
1205 ; CHECK-NEXT: adds.w r3, r3, r12
1206 ; CHECK-NEXT: adcs r0, r1
1207 ; CHECK-NEXT: ubfx r1, r2, #12, #1
1208 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1209 ; CHECK-NEXT: rsbs r1, r1, #0
1210 ; CHECK-NEXT: rsbs r2, r2, #0
1211 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
1212 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
1213 ; CHECK-NEXT: vmov.u16 r1, q0[7]
1214 ; CHECK-NEXT: vmov.u16 r2, q0[6]
1215 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
1216 ; CHECK-NEXT: vand q0, q0, q1
1217 ; CHECK-NEXT: vand q0, q0, q2
1218 ; CHECK-NEXT: vmov r1, r2, d0
1219 ; CHECK-NEXT: adds r1, r1, r3
1220 ; CHECK-NEXT: adcs r2, r0
1221 ; CHECK-NEXT: vmov r0, r3, d1
1222 ; CHECK-NEXT: adds r0, r0, r1
1223 ; CHECK-NEXT: adc.w r1, r2, r3
1224 ; CHECK-NEXT: vpop {d8, d9}
1227 %c = icmp eq <8 x i8> %b, zeroinitializer
1228 %xx = zext <8 x i8> %x to <8 x i64>
1229 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1230 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1234 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %b) {
1235 ; CHECK-LABEL: add_v8i8_v8i64_sext:
1236 ; CHECK: @ %bb.0: @ %entry
1237 ; CHECK-NEXT: vmovlb.u8 q1, q1
1238 ; CHECK-NEXT: vmov.i8 q2, #0xff
1239 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1240 ; CHECK-NEXT: vmov.i8 q1, #0x0
1241 ; CHECK-NEXT: vpsel q1, q2, q1
1242 ; CHECK-NEXT: vmov.u16 r0, q1[2]
1243 ; CHECK-NEXT: vmov.u16 r1, q1[0]
1244 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
1245 ; CHECK-NEXT: vmov.u16 r0, q1[3]
1246 ; CHECK-NEXT: vmov.u16 r1, q1[1]
1247 ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
1248 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1249 ; CHECK-NEXT: vmrs r0, p0
1250 ; CHECK-NEXT: and r2, r0, #1
1251 ; CHECK-NEXT: ubfx r1, r0, #4, #1
1252 ; CHECK-NEXT: rsbs r2, r2, #0
1253 ; CHECK-NEXT: rsbs r1, r1, #0
1254 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
1255 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
1256 ; CHECK-NEXT: vmov.u16 r1, q0[1]
1257 ; CHECK-NEXT: vmov.u16 r2, q0[0]
1258 ; CHECK-NEXT: sxtb r1, r1
1259 ; CHECK-NEXT: sxtb r2, r2
1260 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
1261 ; CHECK-NEXT: asrs r1, r1, #31
1262 ; CHECK-NEXT: asrs r2, r2, #31
1263 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
1264 ; CHECK-NEXT: vand q2, q3, q2
1265 ; CHECK-NEXT: vmov r1, r12, d5
1266 ; CHECK-NEXT: vmov r3, r2, d4
1267 ; CHECK-NEXT: adds r1, r1, r3
1268 ; CHECK-NEXT: ubfx r3, r0, #12, #1
1269 ; CHECK-NEXT: ubfx r0, r0, #8, #1
1270 ; CHECK-NEXT: rsb.w r3, r3, #0
1271 ; CHECK-NEXT: rsb.w r0, r0, #0
1272 ; CHECK-NEXT: adc.w r2, r2, r12
1273 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
1274 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
1275 ; CHECK-NEXT: vmov.u16 r0, q0[3]
1276 ; CHECK-NEXT: vmov.u16 r3, q0[2]
1277 ; CHECK-NEXT: sxtb r0, r0
1278 ; CHECK-NEXT: sxtb r3, r3
1279 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
1280 ; CHECK-NEXT: asrs r0, r0, #31
1281 ; CHECK-NEXT: asrs r3, r3, #31
1282 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r0
1283 ; CHECK-NEXT: vand q2, q3, q2
1284 ; CHECK-NEXT: vmov r0, r3, d4
1285 ; CHECK-NEXT: adds r0, r0, r1
1286 ; CHECK-NEXT: adc.w r1, r2, r3
1287 ; CHECK-NEXT: vmov r2, r3, d5
1288 ; CHECK-NEXT: adds.w r12, r0, r2
1289 ; CHECK-NEXT: vmov.u16 r2, q1[6]
1290 ; CHECK-NEXT: adcs r1, r3
1291 ; CHECK-NEXT: vmov.u16 r3, q1[4]
1292 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1293 ; CHECK-NEXT: vmov.u16 r2, q1[7]
1294 ; CHECK-NEXT: vmov.u16 r3, q1[5]
1295 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
1296 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1297 ; CHECK-NEXT: vmrs r2, p0
1298 ; CHECK-NEXT: and r0, r2, #1
1299 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1300 ; CHECK-NEXT: rsbs r0, r0, #0
1301 ; CHECK-NEXT: rsbs r3, r3, #0
1302 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
1303 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
1304 ; CHECK-NEXT: vmov.u16 r0, q0[5]
1305 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1306 ; CHECK-NEXT: sxtb r0, r0
1307 ; CHECK-NEXT: sxtb r3, r3
1308 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
1309 ; CHECK-NEXT: asrs r0, r0, #31
1310 ; CHECK-NEXT: asrs r3, r3, #31
1311 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r0
1312 ; CHECK-NEXT: vand q1, q2, q1
1313 ; CHECK-NEXT: vmov r0, r3, d2
1314 ; CHECK-NEXT: adds.w r12, r12, r0
1315 ; CHECK-NEXT: adcs r1, r3
1316 ; CHECK-NEXT: vmov r3, r0, d3
1317 ; CHECK-NEXT: adds.w r3, r3, r12
1318 ; CHECK-NEXT: adcs r0, r1
1319 ; CHECK-NEXT: ubfx r1, r2, #12, #1
1320 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1321 ; CHECK-NEXT: rsbs r1, r1, #0
1322 ; CHECK-NEXT: rsbs r2, r2, #0
1323 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
1324 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
1325 ; CHECK-NEXT: vmov.u16 r1, q0[7]
1326 ; CHECK-NEXT: vmov.u16 r2, q0[6]
1327 ; CHECK-NEXT: sxtb r1, r1
1328 ; CHECK-NEXT: sxtb r2, r2
1329 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
1330 ; CHECK-NEXT: asrs r1, r1, #31
1331 ; CHECK-NEXT: asrs r2, r2, #31
1332 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
1333 ; CHECK-NEXT: vand q0, q0, q1
1334 ; CHECK-NEXT: vmov r1, r2, d0
1335 ; CHECK-NEXT: adds r1, r1, r3
1336 ; CHECK-NEXT: adcs r2, r0
1337 ; CHECK-NEXT: vmov r0, r3, d1
1338 ; CHECK-NEXT: adds r0, r0, r1
1339 ; CHECK-NEXT: adc.w r1, r2, r3
1342 %c = icmp eq <8 x i8> %b, zeroinitializer
1343 %xx = sext <8 x i8> %x to <8 x i64>
1344 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1345 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1349 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %b) {
1350 ; CHECK-LABEL: add_v4i8_v4i64_zext:
1351 ; CHECK: @ %bb.0: @ %entry
1352 ; CHECK-NEXT: vmov.i32 q2, #0xff
1353 ; CHECK-NEXT: vand q0, q0, q2
1354 ; CHECK-NEXT: vand q1, q1, q2
1355 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1356 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
1359 %c = icmp eq <4 x i8> %b, zeroinitializer
1360 %xx = zext <4 x i8> %x to <4 x i64>
1361 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1362 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1366 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %b) {
1367 ; CHECK-LABEL: add_v4i8_v4i64_sext:
1368 ; CHECK: @ %bb.0: @ %entry
1369 ; CHECK-NEXT: vmovlb.s8 q0, q0
1370 ; CHECK-NEXT: vmov.i32 q2, #0xff
1371 ; CHECK-NEXT: vand q1, q1, q2
1372 ; CHECK-NEXT: vmovlb.s16 q0, q0
1373 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1374 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
1377 %c = icmp eq <4 x i8> %b, zeroinitializer
1378 %xx = sext <4 x i8> %x to <4 x i64>
1379 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1380 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1384 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
1385 ; CHECK-LABEL: add_v2i8_v2i64_zext:
1386 ; CHECK: @ %bb.0: @ %entry
1387 ; CHECK-NEXT: vmov.i64 q2, #0xff
1388 ; CHECK-NEXT: vand q1, q1, q2
1389 ; CHECK-NEXT: vand q0, q0, q2
1390 ; CHECK-NEXT: vmov r0, s6
1391 ; CHECK-NEXT: vmov r1, s4
1392 ; CHECK-NEXT: cmp r0, #0
1393 ; CHECK-NEXT: cset r0, eq
1394 ; CHECK-NEXT: cmp r0, #0
1395 ; CHECK-NEXT: csetm r0, ne
1396 ; CHECK-NEXT: cmp r1, #0
1397 ; CHECK-NEXT: cset r1, eq
1398 ; CHECK-NEXT: cmp r1, #0
1399 ; CHECK-NEXT: csetm r1, ne
1400 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1401 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1402 ; CHECK-NEXT: vand q0, q0, q1
1403 ; CHECK-NEXT: vmov r0, r1, d1
1404 ; CHECK-NEXT: vmov r2, r3, d0
1405 ; CHECK-NEXT: add r0, r2
1406 ; CHECK-NEXT: orrs r1, r3
1409 %c = icmp eq <2 x i8> %b, zeroinitializer
1410 %xx = zext <2 x i8> %x to <2 x i64>
1411 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1412 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1416 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %b) {
1417 ; CHECK-LABEL: add_v2i8_v2i64_sext:
1418 ; CHECK: @ %bb.0: @ %entry
1419 ; CHECK-NEXT: vmov.i32 q2, #0xff
1420 ; CHECK-NEXT: vand q1, q1, q2
1421 ; CHECK-NEXT: vmov r0, s6
1422 ; CHECK-NEXT: vmov r1, s4
1423 ; CHECK-NEXT: cmp r0, #0
1424 ; CHECK-NEXT: cset r0, eq
1425 ; CHECK-NEXT: cmp r0, #0
1426 ; CHECK-NEXT: csetm r0, ne
1427 ; CHECK-NEXT: cmp r1, #0
1428 ; CHECK-NEXT: cset r1, eq
1429 ; CHECK-NEXT: cmp r1, #0
1430 ; CHECK-NEXT: csetm r1, ne
1431 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1432 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1433 ; CHECK-NEXT: vmov r0, s2
1434 ; CHECK-NEXT: vmov r1, s0
1435 ; CHECK-NEXT: sxtb r0, r0
1436 ; CHECK-NEXT: sxtb r1, r1
1437 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
1438 ; CHECK-NEXT: asrs r0, r0, #31
1439 ; CHECK-NEXT: asrs r1, r1, #31
1440 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
1441 ; CHECK-NEXT: vand q0, q0, q1
1442 ; CHECK-NEXT: vmov r0, r1, d1
1443 ; CHECK-NEXT: vmov r2, r3, d0
1444 ; CHECK-NEXT: adds r0, r0, r2
1445 ; CHECK-NEXT: adcs r1, r3
1448 %c = icmp eq <2 x i8> %b, zeroinitializer
1449 %xx = sext <2 x i8> %x to <2 x i64>
1450 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1451 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1455 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %b) {
1456 ; CHECK-LABEL: add_v2i64_v2i64:
1457 ; CHECK: @ %bb.0: @ %entry
1458 ; CHECK-NEXT: vmov r0, r1, d3
1459 ; CHECK-NEXT: orrs r0, r1
1460 ; CHECK-NEXT: vmov r1, r2, d2
1461 ; CHECK-NEXT: cset r0, eq
1462 ; CHECK-NEXT: cmp r0, #0
1463 ; CHECK-NEXT: csetm r0, ne
1464 ; CHECK-NEXT: orrs r1, r2
1465 ; CHECK-NEXT: cset r1, eq
1466 ; CHECK-NEXT: cmp r1, #0
1467 ; CHECK-NEXT: csetm r1, ne
1468 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1469 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1470 ; CHECK-NEXT: vand q0, q0, q1
1471 ; CHECK-NEXT: vmov r0, r1, d1
1472 ; CHECK-NEXT: vmov r2, r3, d0
1473 ; CHECK-NEXT: adds r0, r0, r2
1474 ; CHECK-NEXT: adcs r1, r3
1477 %c = icmp eq <2 x i64> %b, zeroinitializer
1478 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
1479 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1483 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %b, i32 %a) {
1484 ; CHECK-LABEL: add_v4i32_v4i32_acc:
1485 ; CHECK: @ %bb.0: @ %entry
1486 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1487 ; CHECK-NEXT: vaddvat.u32 r0, q0
1490 %c = icmp eq <4 x i32> %b, zeroinitializer
1491 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1492 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1497 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1498 ; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
1499 ; CHECK: @ %bb.0: @ %entry
1500 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1501 ; CHECK-NEXT: vaddlvat.u32 r0, r1, q0
1504 %c = icmp eq <4 x i32> %b, zeroinitializer
1505 %xx = zext <4 x i32> %x to <4 x i64>
1506 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1507 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1512 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1513 ; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
1514 ; CHECK: @ %bb.0: @ %entry
1515 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1516 ; CHECK-NEXT: vaddlvat.s32 r0, r1, q0
1519 %c = icmp eq <4 x i32> %b, zeroinitializer
1520 %xx = sext <4 x i32> %x to <4 x i64>
1521 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1522 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1527 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1528 ; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
1529 ; CHECK: @ %bb.0: @ %entry
1530 ; CHECK-NEXT: .save {r7, lr}
1531 ; CHECK-NEXT: push {r7, lr}
1532 ; CHECK-NEXT: vmov r2, s6
1533 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
1534 ; CHECK-NEXT: vmov r3, s4
1535 ; CHECK-NEXT: vand q0, q0, q2
1536 ; CHECK-NEXT: cmp r2, #0
1537 ; CHECK-NEXT: cset r2, eq
1538 ; CHECK-NEXT: cmp r2, #0
1539 ; CHECK-NEXT: csetm r2, ne
1540 ; CHECK-NEXT: cmp r3, #0
1541 ; CHECK-NEXT: cset r3, eq
1542 ; CHECK-NEXT: cmp r3, #0
1543 ; CHECK-NEXT: csetm r3, ne
1544 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1545 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1546 ; CHECK-NEXT: vand q0, q0, q1
1547 ; CHECK-NEXT: vmov lr, r12, d1
1548 ; CHECK-NEXT: vmov r3, r2, d0
1549 ; CHECK-NEXT: adds.w r3, r3, lr
1550 ; CHECK-NEXT: adc.w r2, r2, r12
1551 ; CHECK-NEXT: adds r0, r0, r3
1552 ; CHECK-NEXT: adcs r1, r2
1553 ; CHECK-NEXT: pop {r7, pc}
1555 %c = icmp eq <2 x i32> %b, zeroinitializer
1556 %xx = zext <2 x i32> %x to <2 x i64>
1557 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1558 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1563 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1564 ; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
1565 ; CHECK: @ %bb.0: @ %entry
1566 ; CHECK-NEXT: .save {r7, lr}
1567 ; CHECK-NEXT: push {r7, lr}
1568 ; CHECK-NEXT: vmov r2, s2
1569 ; CHECK-NEXT: vmov r3, s0
1570 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1571 ; CHECK-NEXT: asrs r2, r2, #31
1572 ; CHECK-NEXT: asrs r3, r3, #31
1573 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1574 ; CHECK-NEXT: vmov r2, s6
1575 ; CHECK-NEXT: vmov r3, s4
1576 ; CHECK-NEXT: cmp r2, #0
1577 ; CHECK-NEXT: cset r2, eq
1578 ; CHECK-NEXT: cmp r2, #0
1579 ; CHECK-NEXT: csetm r2, ne
1580 ; CHECK-NEXT: cmp r3, #0
1581 ; CHECK-NEXT: cset r3, eq
1582 ; CHECK-NEXT: cmp r3, #0
1583 ; CHECK-NEXT: csetm r3, ne
1584 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1585 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1586 ; CHECK-NEXT: vand q0, q0, q1
1587 ; CHECK-NEXT: vmov lr, r12, d1
1588 ; CHECK-NEXT: vmov r3, r2, d0
1589 ; CHECK-NEXT: adds.w r3, r3, lr
1590 ; CHECK-NEXT: adc.w r2, r2, r12
1591 ; CHECK-NEXT: adds r0, r0, r3
1592 ; CHECK-NEXT: adcs r1, r2
1593 ; CHECK-NEXT: pop {r7, pc}
1595 %c = icmp eq <2 x i32> %b, zeroinitializer
1596 %xx = sext <2 x i32> %x to <2 x i64>
1597 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1598 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1603 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1604 ; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
1605 ; CHECK: @ %bb.0: @ %entry
1606 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1607 ; CHECK-NEXT: vaddvat.u16 r0, q0
1610 %c = icmp eq <8 x i16> %b, zeroinitializer
1611 %xx = zext <8 x i16> %x to <8 x i32>
1612 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1613 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1618 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1619 ; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
1620 ; CHECK: @ %bb.0: @ %entry
1621 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1622 ; CHECK-NEXT: vaddvat.s16 r0, q0
1625 %c = icmp eq <8 x i16> %b, zeroinitializer
1626 %xx = sext <8 x i16> %x to <8 x i32>
1627 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1628 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1633 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1634 ; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
1635 ; CHECK: @ %bb.0: @ %entry
1636 ; CHECK-NEXT: vmovlb.u16 q0, q0
1637 ; CHECK-NEXT: vmovlb.u16 q1, q1
1638 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1639 ; CHECK-NEXT: vaddvat.u32 r0, q0
1642 %c = icmp eq <4 x i16> %b, zeroinitializer
1643 %xx = zext <4 x i16> %x to <4 x i32>
1644 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1645 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1650 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1651 ; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
1652 ; CHECK: @ %bb.0: @ %entry
1653 ; CHECK-NEXT: vmovlb.s16 q0, q0
1654 ; CHECK-NEXT: vmovlb.u16 q1, q1
1655 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1656 ; CHECK-NEXT: vaddvat.u32 r0, q0
1659 %c = icmp eq <4 x i16> %b, zeroinitializer
1660 %xx = sext <4 x i16> %x to <4 x i32>
1661 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1662 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1667 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %b, i16 %a) {
1668 ; CHECK-LABEL: add_v8i16_v8i16_acc:
1669 ; CHECK: @ %bb.0: @ %entry
1670 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1671 ; CHECK-NEXT: vaddvat.u16 r0, q0
1672 ; CHECK-NEXT: uxth r0, r0
1675 %c = icmp eq <8 x i16> %b, zeroinitializer
1676 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1677 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
1682 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1683 ; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
1684 ; CHECK: @ %bb.0: @ %entry
1685 ; CHECK-NEXT: .save {r4, lr}
1686 ; CHECK-NEXT: push {r4, lr}
1687 ; CHECK-NEXT: .vsave {d8, d9}
1688 ; CHECK-NEXT: vpush {d8, d9}
1689 ; CHECK-NEXT: vmov.i8 q2, #0x0
1690 ; CHECK-NEXT: vmov.i8 q3, #0xff
1691 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1692 ; CHECK-NEXT: vpsel q2, q3, q2
1693 ; CHECK-NEXT: vmov.u16 r2, q2[2]
1694 ; CHECK-NEXT: vmov.u16 r3, q2[0]
1695 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1696 ; CHECK-NEXT: vmov.u16 r2, q2[3]
1697 ; CHECK-NEXT: vmov.u16 r3, q2[1]
1698 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1699 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1700 ; CHECK-NEXT: vmov.i64 q1, #0xffff
1701 ; CHECK-NEXT: vmrs r2, p0
1702 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1703 ; CHECK-NEXT: rsb.w r12, r3, #0
1704 ; CHECK-NEXT: and r3, r2, #1
1705 ; CHECK-NEXT: rsbs r3, r3, #0
1706 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r12
1707 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r12
1708 ; CHECK-NEXT: vmov.u16 r12, q0[1]
1709 ; CHECK-NEXT: vmov.u16 r3, q0[0]
1710 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r12
1711 ; CHECK-NEXT: vand q4, q4, q1
1712 ; CHECK-NEXT: vand q3, q4, q3
1713 ; CHECK-NEXT: vmov r12, lr, d7
1714 ; CHECK-NEXT: vmov r3, r4, d6
1715 ; CHECK-NEXT: orr.w lr, lr, r4
1716 ; CHECK-NEXT: ubfx r4, r2, #12, #1
1717 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1718 ; CHECK-NEXT: rsbs r4, r4, #0
1719 ; CHECK-NEXT: rsbs r2, r2, #0
1720 ; CHECK-NEXT: add r3, r12
1721 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r4
1722 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r4
1723 ; CHECK-NEXT: vmov.u16 r2, q0[3]
1724 ; CHECK-NEXT: vmov.u16 r4, q0[2]
1725 ; CHECK-NEXT: vmov q4[2], q4[0], r4, r2
1726 ; CHECK-NEXT: vand q4, q4, q1
1727 ; CHECK-NEXT: vand q3, q4, q3
1728 ; CHECK-NEXT: vmov r2, r4, d6
1729 ; CHECK-NEXT: adds.w r12, r3, r2
1730 ; CHECK-NEXT: adc.w r3, lr, r4
1731 ; CHECK-NEXT: vmov r4, r2, d7
1732 ; CHECK-NEXT: adds.w lr, r12, r4
1733 ; CHECK-NEXT: adc.w r12, r3, r2
1734 ; CHECK-NEXT: vmov.u16 r3, q2[6]
1735 ; CHECK-NEXT: vmov.u16 r2, q2[4]
1736 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1737 ; CHECK-NEXT: vmov.u16 r2, q2[7]
1738 ; CHECK-NEXT: vmov.u16 r3, q2[5]
1739 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
1740 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1741 ; CHECK-NEXT: vmrs r2, p0
1742 ; CHECK-NEXT: and r4, r2, #1
1743 ; CHECK-NEXT: ubfx r3, r2, #4, #1
1744 ; CHECK-NEXT: rsbs r4, r4, #0
1745 ; CHECK-NEXT: rsbs r3, r3, #0
1746 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r3
1747 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r3
1748 ; CHECK-NEXT: vmov.u16 r3, q0[5]
1749 ; CHECK-NEXT: vmov.u16 r4, q0[4]
1750 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
1751 ; CHECK-NEXT: vand q3, q3, q1
1752 ; CHECK-NEXT: vand q2, q3, q2
1753 ; CHECK-NEXT: vmov r3, r4, d4
1754 ; CHECK-NEXT: adds.w lr, lr, r3
1755 ; CHECK-NEXT: adc.w r12, r12, r4
1756 ; CHECK-NEXT: vmov r3, r4, d5
1757 ; CHECK-NEXT: adds.w lr, lr, r3
1758 ; CHECK-NEXT: ubfx r3, r2, #12, #1
1759 ; CHECK-NEXT: ubfx r2, r2, #8, #1
1760 ; CHECK-NEXT: rsb.w r3, r3, #0
1761 ; CHECK-NEXT: rsb.w r2, r2, #0
1762 ; CHECK-NEXT: adc.w r4, r4, r12
1763 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1764 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
1765 ; CHECK-NEXT: vmov.u16 r2, q0[7]
1766 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1767 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1768 ; CHECK-NEXT: vand q0, q0, q1
1769 ; CHECK-NEXT: vand q0, q0, q2
1770 ; CHECK-NEXT: vmov r2, r3, d0
1771 ; CHECK-NEXT: adds.w r12, lr, r2
1772 ; CHECK-NEXT: adcs r3, r4
1773 ; CHECK-NEXT: vmov r4, r2, d1
1774 ; CHECK-NEXT: adds.w r4, r4, r12
1775 ; CHECK-NEXT: adcs r2, r3
1776 ; CHECK-NEXT: adds r0, r0, r4
1777 ; CHECK-NEXT: adcs r1, r2
1778 ; CHECK-NEXT: vpop {d8, d9}
1779 ; CHECK-NEXT: pop {r4, pc}
1781 %c = icmp eq <8 x i16> %b, zeroinitializer
1782 %xx = zext <8 x i16> %x to <8 x i64>
1783 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1784 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1789 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1790 ; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
1791 ; CHECK: @ %bb.0: @ %entry
1792 ; CHECK-NEXT: .save {r4, r5, r7, lr}
1793 ; CHECK-NEXT: push {r4, r5, r7, lr}
1794 ; CHECK-NEXT: vmov.i8 q2, #0x0
1795 ; CHECK-NEXT: vmov.i8 q3, #0xff
1796 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1797 ; CHECK-NEXT: vpsel q1, q3, q2
1798 ; CHECK-NEXT: vmov.u16 r2, q1[2]
1799 ; CHECK-NEXT: vmov.u16 r3, q1[0]
1800 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1801 ; CHECK-NEXT: vmov.u16 r2, q1[3]
1802 ; CHECK-NEXT: vmov.u16 r3, q1[1]
1803 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
1804 ; CHECK-NEXT: vmov.s16 r2, q0[0]
1805 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1806 ; CHECK-NEXT: vmrs lr, p0
1807 ; CHECK-NEXT: ubfx r3, lr, #4, #1
1808 ; CHECK-NEXT: rsb.w r12, r3, #0
1809 ; CHECK-NEXT: and r3, lr, #1
1810 ; CHECK-NEXT: rsbs r3, r3, #0
1811 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r12
1812 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r12
1813 ; CHECK-NEXT: vmov.s16 r3, q0[1]
1814 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1815 ; CHECK-NEXT: asrs r3, r3, #31
1816 ; CHECK-NEXT: asrs r2, r2, #31
1817 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
1818 ; CHECK-NEXT: vand q2, q3, q2
1819 ; CHECK-NEXT: vmov r2, r12, d5
1820 ; CHECK-NEXT: vmov r3, r4, d4
1821 ; CHECK-NEXT: adds r5, r3, r2
1822 ; CHECK-NEXT: ubfx r2, lr, #8, #1
1823 ; CHECK-NEXT: adc.w r3, r4, r12
1824 ; CHECK-NEXT: ubfx r4, lr, #12, #1
1825 ; CHECK-NEXT: rsbs r4, r4, #0
1826 ; CHECK-NEXT: rsbs r2, r2, #0
1827 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
1828 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
1829 ; CHECK-NEXT: vmov.s16 r2, q0[3]
1830 ; CHECK-NEXT: vmov.s16 r4, q0[2]
1831 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
1832 ; CHECK-NEXT: asrs r2, r2, #31
1833 ; CHECK-NEXT: asrs r4, r4, #31
1834 ; CHECK-NEXT: vmov q3[3], q3[1], r4, r2
1835 ; CHECK-NEXT: vand q2, q3, q2
1836 ; CHECK-NEXT: vmov r2, r4, d4
1837 ; CHECK-NEXT: adds r2, r2, r5
1838 ; CHECK-NEXT: adcs r3, r4
1839 ; CHECK-NEXT: vmov r5, r4, d5
1840 ; CHECK-NEXT: adds.w r12, r2, r5
1841 ; CHECK-NEXT: vmov.u16 r5, q1[6]
1842 ; CHECK-NEXT: adcs r3, r4
1843 ; CHECK-NEXT: vmov.u16 r4, q1[4]
1844 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r5
1845 ; CHECK-NEXT: vmov.u16 r5, q1[7]
1846 ; CHECK-NEXT: vmov.u16 r4, q1[5]
1847 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r5
1848 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1849 ; CHECK-NEXT: vmrs r5, p0
1850 ; CHECK-NEXT: and r2, r5, #1
1851 ; CHECK-NEXT: ubfx r4, r5, #4, #1
1852 ; CHECK-NEXT: rsbs r2, r2, #0
1853 ; CHECK-NEXT: rsbs r4, r4, #0
1854 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
1855 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
1856 ; CHECK-NEXT: vmov.s16 r2, q0[5]
1857 ; CHECK-NEXT: vmov.s16 r4, q0[4]
1858 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
1859 ; CHECK-NEXT: asrs r2, r2, #31
1860 ; CHECK-NEXT: asrs r4, r4, #31
1861 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r2
1862 ; CHECK-NEXT: vand q1, q2, q1
1863 ; CHECK-NEXT: vmov r2, r4, d2
1864 ; CHECK-NEXT: adds.w r12, r12, r2
1865 ; CHECK-NEXT: adcs r3, r4
1866 ; CHECK-NEXT: vmov r4, r2, d3
1867 ; CHECK-NEXT: adds.w r4, r4, r12
1868 ; CHECK-NEXT: adcs r2, r3
1869 ; CHECK-NEXT: ubfx r3, r5, #12, #1
1870 ; CHECK-NEXT: ubfx r5, r5, #8, #1
1871 ; CHECK-NEXT: rsbs r3, r3, #0
1872 ; CHECK-NEXT: rsbs r5, r5, #0
1873 ; CHECK-NEXT: vmov q1[2], q1[0], r5, r3
1874 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r3
1875 ; CHECK-NEXT: vmov.s16 r3, q0[7]
1876 ; CHECK-NEXT: vmov.s16 r5, q0[6]
1877 ; CHECK-NEXT: vmov q0[2], q0[0], r5, r3
1878 ; CHECK-NEXT: asrs r3, r3, #31
1879 ; CHECK-NEXT: asrs r5, r5, #31
1880 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
1881 ; CHECK-NEXT: vand q0, q0, q1
1882 ; CHECK-NEXT: vmov r3, r5, d0
1883 ; CHECK-NEXT: adds r3, r3, r4
1884 ; CHECK-NEXT: adcs r2, r5
1885 ; CHECK-NEXT: vmov r5, r4, d1
1886 ; CHECK-NEXT: adds r3, r3, r5
1887 ; CHECK-NEXT: adcs r2, r4
1888 ; CHECK-NEXT: adds r0, r0, r3
1889 ; CHECK-NEXT: adcs r1, r2
1890 ; CHECK-NEXT: pop {r4, r5, r7, pc}
1892 %c = icmp eq <8 x i16> %b, zeroinitializer
1893 %xx = sext <8 x i16> %x to <8 x i64>
1894 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1895 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1900 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1901 ; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
1902 ; CHECK: @ %bb.0: @ %entry
1903 ; CHECK-NEXT: .save {r7, lr}
1904 ; CHECK-NEXT: push {r7, lr}
1905 ; CHECK-NEXT: vmov.i64 q2, #0xffff
1906 ; CHECK-NEXT: vand q1, q1, q2
1907 ; CHECK-NEXT: vand q0, q0, q2
1908 ; CHECK-NEXT: vmov r2, s6
1909 ; CHECK-NEXT: vmov r3, s4
1910 ; CHECK-NEXT: cmp r2, #0
1911 ; CHECK-NEXT: cset r2, eq
1912 ; CHECK-NEXT: cmp r2, #0
1913 ; CHECK-NEXT: csetm r2, ne
1914 ; CHECK-NEXT: cmp r3, #0
1915 ; CHECK-NEXT: cset r3, eq
1916 ; CHECK-NEXT: cmp r3, #0
1917 ; CHECK-NEXT: csetm r3, ne
1918 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1919 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1920 ; CHECK-NEXT: vand q0, q0, q1
1921 ; CHECK-NEXT: vmov r12, lr, d1
1922 ; CHECK-NEXT: vmov r2, r3, d0
1923 ; CHECK-NEXT: add r2, r12
1924 ; CHECK-NEXT: orr.w r3, r3, lr
1925 ; CHECK-NEXT: adds r0, r0, r2
1926 ; CHECK-NEXT: adcs r1, r3
1927 ; CHECK-NEXT: pop {r7, pc}
1929 %c = icmp eq <2 x i16> %b, zeroinitializer
1930 %xx = zext <2 x i16> %x to <2 x i64>
1931 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1932 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1937 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1938 ; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
1939 ; CHECK: @ %bb.0: @ %entry
1940 ; CHECK-NEXT: .save {r7, lr}
1941 ; CHECK-NEXT: push {r7, lr}
1942 ; CHECK-NEXT: vmov.i32 q2, #0xffff
1943 ; CHECK-NEXT: vand q1, q1, q2
1944 ; CHECK-NEXT: vmov r2, s6
1945 ; CHECK-NEXT: vmov r3, s4
1946 ; CHECK-NEXT: cmp r2, #0
1947 ; CHECK-NEXT: cset r2, eq
1948 ; CHECK-NEXT: cmp r2, #0
1949 ; CHECK-NEXT: csetm r2, ne
1950 ; CHECK-NEXT: cmp r3, #0
1951 ; CHECK-NEXT: cset r3, eq
1952 ; CHECK-NEXT: cmp r3, #0
1953 ; CHECK-NEXT: csetm r3, ne
1954 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1955 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1956 ; CHECK-NEXT: vmov r2, s2
1957 ; CHECK-NEXT: vmov r3, s0
1958 ; CHECK-NEXT: sxth r2, r2
1959 ; CHECK-NEXT: sxth r3, r3
1960 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1961 ; CHECK-NEXT: asrs r2, r2, #31
1962 ; CHECK-NEXT: asrs r3, r3, #31
1963 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1964 ; CHECK-NEXT: vand q0, q0, q1
1965 ; CHECK-NEXT: vmov lr, r12, d1
1966 ; CHECK-NEXT: vmov r3, r2, d0
1967 ; CHECK-NEXT: adds.w r3, r3, lr
1968 ; CHECK-NEXT: adc.w r2, r2, r12
1969 ; CHECK-NEXT: adds r0, r0, r3
1970 ; CHECK-NEXT: adcs r1, r2
1971 ; CHECK-NEXT: pop {r7, pc}
1973 %c = icmp eq <2 x i16> %b, zeroinitializer
1974 %xx = sext <2 x i16> %x to <2 x i64>
1975 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1976 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1981 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1982 ; CHECK-LABEL: add_v16i8_v16i32_acc_zext:
1983 ; CHECK: @ %bb.0: @ %entry
1984 ; CHECK-NEXT: vpt.i8 eq, q1, zr
1985 ; CHECK-NEXT: vaddvat.u8 r0, q0
1988 %c = icmp eq <16 x i8> %b, zeroinitializer
1989 %xx = zext <16 x i8> %x to <16 x i32>
1990 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
1991 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
1996 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1997 ; CHECK-LABEL: add_v16i8_v16i32_acc_sext:
1998 ; CHECK: @ %bb.0: @ %entry
1999 ; CHECK-NEXT: vpt.i8 eq, q1, zr
2000 ; CHECK-NEXT: vaddvat.s8 r0, q0
2003 %c = icmp eq <16 x i8> %b, zeroinitializer
2004 %xx = sext <16 x i8> %x to <16 x i32>
2005 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
2006 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
2011 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
2012 ; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
2013 ; CHECK: @ %bb.0: @ %entry
2014 ; CHECK-NEXT: vmov.i32 q2, #0xff
2015 ; CHECK-NEXT: vand q0, q0, q2
2016 ; CHECK-NEXT: vand q1, q1, q2
2017 ; CHECK-NEXT: vpt.i32 eq, q1, zr
2018 ; CHECK-NEXT: vaddvat.u32 r0, q0
2021 %c = icmp eq <4 x i8> %b, zeroinitializer
2022 %xx = zext <4 x i8> %x to <4 x i32>
2023 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
2024 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
2029 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
2030 ; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
2031 ; CHECK: @ %bb.0: @ %entry
2032 ; CHECK-NEXT: vmovlb.s8 q0, q0
2033 ; CHECK-NEXT: vmov.i32 q2, #0xff
2034 ; CHECK-NEXT: vand q1, q1, q2
2035 ; CHECK-NEXT: vmovlb.s16 q0, q0
2036 ; CHECK-NEXT: vpt.i32 eq, q1, zr
2037 ; CHECK-NEXT: vaddvat.u32 r0, q0
2040 %c = icmp eq <4 x i8> %b, zeroinitializer
2041 %xx = sext <4 x i8> %x to <4 x i32>
2042 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
2043 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
2048 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
2049 ; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
2050 ; CHECK: @ %bb.0: @ %entry
2051 ; CHECK-NEXT: vpt.i8 eq, q1, zr
2052 ; CHECK-NEXT: vaddvat.u8 r0, q0
2053 ; CHECK-NEXT: uxth r0, r0
2056 %c = icmp eq <16 x i8> %b, zeroinitializer
2057 %xx = zext <16 x i8> %x to <16 x i16>
2058 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
2059 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
2064 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
2065 ; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
2066 ; CHECK: @ %bb.0: @ %entry
2067 ; CHECK-NEXT: vpt.i8 eq, q1, zr
2068 ; CHECK-NEXT: vaddvat.s8 r0, q0
2069 ; CHECK-NEXT: sxth r0, r0
2072 %c = icmp eq <16 x i8> %b, zeroinitializer
2073 %xx = sext <16 x i8> %x to <16 x i16>
2074 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
2075 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
2080 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2081 ; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
2082 ; CHECK: @ %bb.0: @ %entry
2083 ; CHECK-NEXT: vmovlb.u8 q0, q0
2084 ; CHECK-NEXT: vmovlb.u8 q1, q1
2085 ; CHECK-NEXT: vpt.i16 eq, q1, zr
2086 ; CHECK-NEXT: vaddvat.u16 r0, q0
2087 ; CHECK-NEXT: uxth r0, r0
2090 %c = icmp eq <8 x i8> %b, zeroinitializer
2091 %xx = zext <8 x i8> %x to <8 x i16>
2092 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2093 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2098 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2099 ; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
2100 ; CHECK: @ %bb.0: @ %entry
2101 ; CHECK-NEXT: vmovlb.s8 q0, q0
2102 ; CHECK-NEXT: vmovlb.u8 q1, q1
2103 ; CHECK-NEXT: vpt.i16 eq, q1, zr
2104 ; CHECK-NEXT: vaddvat.u16 r0, q0
2105 ; CHECK-NEXT: sxth r0, r0
2108 %c = icmp eq <8 x i8> %b, zeroinitializer
2109 %xx = sext <8 x i8> %x to <8 x i16>
2110 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2111 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2116 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %b, i8 %a) {
2117 ; CHECK-LABEL: add_v16i8_v16i8_acc:
2118 ; CHECK: @ %bb.0: @ %entry
2119 ; CHECK-NEXT: vpt.i8 eq, q1, zr
2120 ; CHECK-NEXT: vaddvat.u8 r0, q0
2121 ; CHECK-NEXT: uxtb r0, r0
2124 %c = icmp eq <16 x i8> %b, zeroinitializer
2125 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
2126 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
2131 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2132 ; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
2133 ; CHECK: @ %bb.0: @ %entry
2134 ; CHECK-NEXT: .save {r4, lr}
2135 ; CHECK-NEXT: push {r4, lr}
2136 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
2137 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
2138 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
2139 ; CHECK-NEXT: vmov.i8 q2, #0x0
2140 ; CHECK-NEXT: vmov.i8 q3, #0xff
2141 ; CHECK-NEXT: vpsel q4, q3, q2
2142 ; CHECK-NEXT: vmov.u8 r2, q4[0]
2143 ; CHECK-NEXT: vmov.16 q1[0], r2
2144 ; CHECK-NEXT: vmov.u8 r2, q4[1]
2145 ; CHECK-NEXT: vmov.16 q1[1], r2
2146 ; CHECK-NEXT: vmov.u8 r2, q4[2]
2147 ; CHECK-NEXT: vmov.16 q1[2], r2
2148 ; CHECK-NEXT: vmov.u8 r2, q4[3]
2149 ; CHECK-NEXT: vmov.16 q1[3], r2
2150 ; CHECK-NEXT: vmov.u8 r2, q4[4]
2151 ; CHECK-NEXT: vmov.16 q1[4], r2
2152 ; CHECK-NEXT: vmov.u8 r2, q4[5]
2153 ; CHECK-NEXT: vmov.16 q1[5], r2
2154 ; CHECK-NEXT: vmov.u8 r2, q4[6]
2155 ; CHECK-NEXT: vmov.16 q1[6], r2
2156 ; CHECK-NEXT: vmov.u8 r2, q4[7]
2157 ; CHECK-NEXT: vmov.16 q1[7], r2
2158 ; CHECK-NEXT: vcmp.i16 ne, q1, zr
2159 ; CHECK-NEXT: vpsel q5, q3, q2
2160 ; CHECK-NEXT: vmov.u16 r2, q5[2]
2161 ; CHECK-NEXT: vmov.u16 r3, q5[0]
2162 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
2163 ; CHECK-NEXT: vmov.u16 r2, q5[3]
2164 ; CHECK-NEXT: vmov.u16 r3, q5[1]
2165 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
2166 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
2167 ; CHECK-NEXT: vmov.i64 q1, #0xff
2168 ; CHECK-NEXT: vmrs r2, p0
2169 ; CHECK-NEXT: ubfx r3, r2, #4, #1
2170 ; CHECK-NEXT: rsb.w r12, r3, #0
2171 ; CHECK-NEXT: and r3, r2, #1
2172 ; CHECK-NEXT: rsbs r3, r3, #0
2173 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r12
2174 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r12
2175 ; CHECK-NEXT: vmov.u8 r12, q0[1]
2176 ; CHECK-NEXT: vmov.u8 r3, q0[0]
2177 ; CHECK-NEXT: vmov q7[2], q7[0], r3, r12
2178 ; CHECK-NEXT: vand q7, q7, q1
2179 ; CHECK-NEXT: vand q6, q7, q6
2180 ; CHECK-NEXT: vmov r12, lr, d13
2181 ; CHECK-NEXT: vmov r3, r4, d12
2182 ; CHECK-NEXT: orr.w lr, lr, r4
2183 ; CHECK-NEXT: ubfx r4, r2, #12, #1
2184 ; CHECK-NEXT: ubfx r2, r2, #8, #1
2185 ; CHECK-NEXT: rsbs r4, r4, #0
2186 ; CHECK-NEXT: rsbs r2, r2, #0
2187 ; CHECK-NEXT: add r3, r12
2188 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r4
2189 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r4
2190 ; CHECK-NEXT: vmov.u8 r2, q0[3]
2191 ; CHECK-NEXT: vmov.u8 r4, q0[2]
2192 ; CHECK-NEXT: vmov q7[2], q7[0], r4, r2
2193 ; CHECK-NEXT: vand q7, q7, q1
2194 ; CHECK-NEXT: vand q6, q7, q6
2195 ; CHECK-NEXT: vmov r2, r4, d12
2196 ; CHECK-NEXT: adds.w r12, r3, r2
2197 ; CHECK-NEXT: adc.w r3, lr, r4
2198 ; CHECK-NEXT: vmov r4, r2, d13
2199 ; CHECK-NEXT: adds.w lr, r12, r4
2200 ; CHECK-NEXT: adc.w r12, r3, r2
2201 ; CHECK-NEXT: vmov.u16 r3, q5[6]
2202 ; CHECK-NEXT: vmov.u16 r2, q5[4]
2203 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
2204 ; CHECK-NEXT: vmov.u16 r2, q5[7]
2205 ; CHECK-NEXT: vmov.u16 r3, q5[5]
2206 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
2207 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
2208 ; CHECK-NEXT: vmrs r2, p0
2209 ; CHECK-NEXT: and r4, r2, #1
2210 ; CHECK-NEXT: ubfx r3, r2, #4, #1
2211 ; CHECK-NEXT: rsbs r4, r4, #0
2212 ; CHECK-NEXT: rsbs r3, r3, #0
2213 ; CHECK-NEXT: vmov q5[2], q5[0], r4, r3
2214 ; CHECK-NEXT: vmov q5[3], q5[1], r4, r3
2215 ; CHECK-NEXT: vmov.u8 r3, q0[5]
2216 ; CHECK-NEXT: vmov.u8 r4, q0[4]
2217 ; CHECK-NEXT: vmov q6[2], q6[0], r4, r3
2218 ; CHECK-NEXT: vand q6, q6, q1
2219 ; CHECK-NEXT: vand q5, q6, q5
2220 ; CHECK-NEXT: vmov r3, r4, d10
2221 ; CHECK-NEXT: adds.w lr, lr, r3
2222 ; CHECK-NEXT: adc.w r12, r12, r4
2223 ; CHECK-NEXT: vmov r3, r4, d11
2224 ; CHECK-NEXT: adds.w lr, lr, r3
2225 ; CHECK-NEXT: ubfx r3, r2, #12, #1
2226 ; CHECK-NEXT: ubfx r2, r2, #8, #1
2227 ; CHECK-NEXT: rsb.w r3, r3, #0
2228 ; CHECK-NEXT: rsb.w r2, r2, #0
2229 ; CHECK-NEXT: adc.w r4, r4, r12
2230 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
2231 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
2232 ; CHECK-NEXT: vmov.u8 r2, q0[7]
2233 ; CHECK-NEXT: vmov.u8 r3, q0[6]
2234 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
2235 ; CHECK-NEXT: vand q6, q6, q1
2236 ; CHECK-NEXT: vand q5, q6, q5
2237 ; CHECK-NEXT: vmov r2, r3, d10
2238 ; CHECK-NEXT: adds.w r12, lr, r2
2239 ; CHECK-NEXT: adcs r3, r4
2240 ; CHECK-NEXT: vmov r4, r2, d11
2241 ; CHECK-NEXT: adds.w lr, r12, r4
2242 ; CHECK-NEXT: adc.w r12, r3, r2
2243 ; CHECK-NEXT: vmov.u8 r2, q4[8]
2244 ; CHECK-NEXT: vmov.16 q5[0], r2
2245 ; CHECK-NEXT: vmov.u8 r2, q4[9]
2246 ; CHECK-NEXT: vmov.16 q5[1], r2
2247 ; CHECK-NEXT: vmov.u8 r2, q4[10]
2248 ; CHECK-NEXT: vmov.16 q5[2], r2
2249 ; CHECK-NEXT: vmov.u8 r2, q4[11]
2250 ; CHECK-NEXT: vmov.16 q5[3], r2
2251 ; CHECK-NEXT: vmov.u8 r2, q4[12]
2252 ; CHECK-NEXT: vmov.16 q5[4], r2
2253 ; CHECK-NEXT: vmov.u8 r2, q4[13]
2254 ; CHECK-NEXT: vmov.16 q5[5], r2
2255 ; CHECK-NEXT: vmov.u8 r2, q4[14]
2256 ; CHECK-NEXT: vmov.16 q5[6], r2
2257 ; CHECK-NEXT: vmov.u8 r2, q4[15]
2258 ; CHECK-NEXT: vmov.16 q5[7], r2
2259 ; CHECK-NEXT: vcmp.i16 ne, q5, zr
2260 ; CHECK-NEXT: vpsel q2, q3, q2
2261 ; CHECK-NEXT: vmov.u16 r2, q2[2]
2262 ; CHECK-NEXT: vmov.u16 r3, q2[0]
2263 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
2264 ; CHECK-NEXT: vmov.u16 r2, q2[3]
2265 ; CHECK-NEXT: vmov.u16 r3, q2[1]
2266 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
2267 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2268 ; CHECK-NEXT: vmrs r2, p0
2269 ; CHECK-NEXT: and r4, r2, #1
2270 ; CHECK-NEXT: ubfx r3, r2, #4, #1
2271 ; CHECK-NEXT: rsbs r4, r4, #0
2272 ; CHECK-NEXT: rsbs r3, r3, #0
2273 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
2274 ; CHECK-NEXT: vmov q3[3], q3[1], r4, r3
2275 ; CHECK-NEXT: vmov.u8 r3, q0[9]
2276 ; CHECK-NEXT: vmov.u8 r4, q0[8]
2277 ; CHECK-NEXT: vmov q4[2], q4[0], r4, r3
2278 ; CHECK-NEXT: vand q4, q4, q1
2279 ; CHECK-NEXT: vand q3, q4, q3
2280 ; CHECK-NEXT: vmov r3, r4, d6
2281 ; CHECK-NEXT: adds.w lr, lr, r3
2282 ; CHECK-NEXT: adc.w r12, r12, r4
2283 ; CHECK-NEXT: vmov r3, r4, d7
2284 ; CHECK-NEXT: adds.w lr, lr, r3
2285 ; CHECK-NEXT: ubfx r3, r2, #12, #1
2286 ; CHECK-NEXT: ubfx r2, r2, #8, #1
2287 ; CHECK-NEXT: rsb.w r3, r3, #0
2288 ; CHECK-NEXT: rsb.w r2, r2, #0
2289 ; CHECK-NEXT: adc.w r4, r4, r12
2290 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
2291 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
2292 ; CHECK-NEXT: vmov.u8 r2, q0[11]
2293 ; CHECK-NEXT: vmov.u8 r3, q0[10]
2294 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
2295 ; CHECK-NEXT: vand q4, q4, q1
2296 ; CHECK-NEXT: vand q3, q4, q3
2297 ; CHECK-NEXT: vmov r2, r3, d6
2298 ; CHECK-NEXT: adds.w r12, lr, r2
2299 ; CHECK-NEXT: adcs r3, r4
2300 ; CHECK-NEXT: vmov r4, r2, d7
2301 ; CHECK-NEXT: adds.w lr, r12, r4
2302 ; CHECK-NEXT: adc.w r12, r3, r2
2303 ; CHECK-NEXT: vmov.u16 r3, q2[6]
2304 ; CHECK-NEXT: vmov.u16 r2, q2[4]
2305 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
2306 ; CHECK-NEXT: vmov.u16 r2, q2[7]
2307 ; CHECK-NEXT: vmov.u16 r3, q2[5]
2308 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
2309 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2310 ; CHECK-NEXT: vmrs r2, p0
2311 ; CHECK-NEXT: and r4, r2, #1
2312 ; CHECK-NEXT: ubfx r3, r2, #4, #1
2313 ; CHECK-NEXT: rsbs r4, r4, #0
2314 ; CHECK-NEXT: rsbs r3, r3, #0
2315 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r3
2316 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r3
2317 ; CHECK-NEXT: vmov.u8 r3, q0[13]
2318 ; CHECK-NEXT: vmov.u8 r4, q0[12]
2319 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
2320 ; CHECK-NEXT: vand q3, q3, q1
2321 ; CHECK-NEXT: vand q2, q3, q2
2322 ; CHECK-NEXT: vmov r3, r4, d4
2323 ; CHECK-NEXT: adds.w lr, lr, r3
2324 ; CHECK-NEXT: adc.w r12, r12, r4
2325 ; CHECK-NEXT: vmov r3, r4, d5
2326 ; CHECK-NEXT: adds.w lr, lr, r3
2327 ; CHECK-NEXT: ubfx r3, r2, #12, #1
2328 ; CHECK-NEXT: ubfx r2, r2, #8, #1
2329 ; CHECK-NEXT: rsb.w r3, r3, #0
2330 ; CHECK-NEXT: rsb.w r2, r2, #0
2331 ; CHECK-NEXT: adc.w r4, r4, r12
2332 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
2333 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
2334 ; CHECK-NEXT: vmov.u8 r2, q0[15]
2335 ; CHECK-NEXT: vmov.u8 r3, q0[14]
2336 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2337 ; CHECK-NEXT: vand q0, q0, q1
2338 ; CHECK-NEXT: vand q0, q0, q2
2339 ; CHECK-NEXT: vmov r2, r3, d0
2340 ; CHECK-NEXT: adds.w r12, lr, r2
2341 ; CHECK-NEXT: adcs r3, r4
2342 ; CHECK-NEXT: vmov r4, r2, d1
2343 ; CHECK-NEXT: adds.w r4, r4, r12
2344 ; CHECK-NEXT: adcs r2, r3
2345 ; CHECK-NEXT: adds r0, r0, r4
2346 ; CHECK-NEXT: adcs r1, r2
2347 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
2348 ; CHECK-NEXT: pop {r4, pc}
2350 %c = icmp eq <16 x i8> %b, zeroinitializer
2351 %xx = zext <16 x i8> %x to <16 x i64>
2352 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2353 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2358 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2359 ; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
2360 ; CHECK: @ %bb.0: @ %entry
2361 ; CHECK-NEXT: .save {r4, r5, r7, lr}
2362 ; CHECK-NEXT: push {r4, r5, r7, lr}
2363 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
2364 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
2365 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
2366 ; CHECK-NEXT: vmov.i8 q1, #0x0
2367 ; CHECK-NEXT: vmov.i8 q2, #0xff
2368 ; CHECK-NEXT: vpsel q3, q2, q1
2369 ; CHECK-NEXT: vmov.u8 r2, q3[0]
2370 ; CHECK-NEXT: vmov.16 q4[0], r2
2371 ; CHECK-NEXT: vmov.u8 r2, q3[1]
2372 ; CHECK-NEXT: vmov.16 q4[1], r2
2373 ; CHECK-NEXT: vmov.u8 r2, q3[2]
2374 ; CHECK-NEXT: vmov.16 q4[2], r2
2375 ; CHECK-NEXT: vmov.u8 r2, q3[3]
2376 ; CHECK-NEXT: vmov.16 q4[3], r2
2377 ; CHECK-NEXT: vmov.u8 r2, q3[4]
2378 ; CHECK-NEXT: vmov.16 q4[4], r2
2379 ; CHECK-NEXT: vmov.u8 r2, q3[5]
2380 ; CHECK-NEXT: vmov.16 q4[5], r2
2381 ; CHECK-NEXT: vmov.u8 r2, q3[6]
2382 ; CHECK-NEXT: vmov.16 q4[6], r2
2383 ; CHECK-NEXT: vmov.u8 r2, q3[7]
2384 ; CHECK-NEXT: vmov.16 q4[7], r2
2385 ; CHECK-NEXT: vcmp.i16 ne, q4, zr
2386 ; CHECK-NEXT: vpsel q4, q2, q1
2387 ; CHECK-NEXT: vmov.u16 r2, q4[2]
2388 ; CHECK-NEXT: vmov.u16 r3, q4[0]
2389 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
2390 ; CHECK-NEXT: vmov.u16 r2, q4[3]
2391 ; CHECK-NEXT: vmov.u16 r3, q4[1]
2392 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
2393 ; CHECK-NEXT: vmov.s8 r2, q0[0]
2394 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2395 ; CHECK-NEXT: vmrs lr, p0
2396 ; CHECK-NEXT: ubfx r3, lr, #4, #1
2397 ; CHECK-NEXT: rsb.w r12, r3, #0
2398 ; CHECK-NEXT: and r3, lr, #1
2399 ; CHECK-NEXT: rsbs r3, r3, #0
2400 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r12
2401 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r12
2402 ; CHECK-NEXT: vmov.s8 r3, q0[1]
2403 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
2404 ; CHECK-NEXT: asrs r3, r3, #31
2405 ; CHECK-NEXT: asrs r2, r2, #31
2406 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
2407 ; CHECK-NEXT: vand q5, q6, q5
2408 ; CHECK-NEXT: vmov r2, r12, d11
2409 ; CHECK-NEXT: vmov r3, r4, d10
2410 ; CHECK-NEXT: adds r5, r3, r2
2411 ; CHECK-NEXT: ubfx r2, lr, #8, #1
2412 ; CHECK-NEXT: adc.w r3, r4, r12
2413 ; CHECK-NEXT: ubfx r4, lr, #12, #1
2414 ; CHECK-NEXT: rsbs r4, r4, #0
2415 ; CHECK-NEXT: rsbs r2, r2, #0
2416 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r4
2417 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r4
2418 ; CHECK-NEXT: vmov.s8 r2, q0[3]
2419 ; CHECK-NEXT: vmov.s8 r4, q0[2]
2420 ; CHECK-NEXT: vmov q6[2], q6[0], r4, r2
2421 ; CHECK-NEXT: asrs r2, r2, #31
2422 ; CHECK-NEXT: asrs r4, r4, #31
2423 ; CHECK-NEXT: vmov q6[3], q6[1], r4, r2
2424 ; CHECK-NEXT: vand q5, q6, q5
2425 ; CHECK-NEXT: vmov r2, r4, d10
2426 ; CHECK-NEXT: adds r2, r2, r5
2427 ; CHECK-NEXT: adcs r3, r4
2428 ; CHECK-NEXT: vmov r5, r4, d11
2429 ; CHECK-NEXT: adds.w r12, r2, r5
2430 ; CHECK-NEXT: vmov.u16 r5, q4[6]
2431 ; CHECK-NEXT: adcs r3, r4
2432 ; CHECK-NEXT: vmov.u16 r4, q4[4]
2433 ; CHECK-NEXT: vmov q5[2], q5[0], r4, r5
2434 ; CHECK-NEXT: vmov.u16 r5, q4[7]
2435 ; CHECK-NEXT: vmov.u16 r4, q4[5]
2436 ; CHECK-NEXT: vmov q5[3], q5[1], r4, r5
2437 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2438 ; CHECK-NEXT: vmrs r5, p0
2439 ; CHECK-NEXT: and r2, r5, #1
2440 ; CHECK-NEXT: ubfx r4, r5, #4, #1
2441 ; CHECK-NEXT: rsbs r2, r2, #0
2442 ; CHECK-NEXT: rsbs r4, r4, #0
2443 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r4
2444 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r4
2445 ; CHECK-NEXT: vmov.s8 r2, q0[5]
2446 ; CHECK-NEXT: vmov.s8 r4, q0[4]
2447 ; CHECK-NEXT: vmov q5[2], q5[0], r4, r2
2448 ; CHECK-NEXT: asrs r2, r2, #31
2449 ; CHECK-NEXT: asrs r4, r4, #31
2450 ; CHECK-NEXT: vmov q5[3], q5[1], r4, r2
2451 ; CHECK-NEXT: vand q4, q5, q4
2452 ; CHECK-NEXT: vmov r2, r4, d8
2453 ; CHECK-NEXT: adds.w r12, r12, r2
2454 ; CHECK-NEXT: adcs r3, r4
2455 ; CHECK-NEXT: vmov r4, r2, d9
2456 ; CHECK-NEXT: adds.w r4, r4, r12
2457 ; CHECK-NEXT: adcs r2, r3
2458 ; CHECK-NEXT: ubfx r3, r5, #12, #1
2459 ; CHECK-NEXT: ubfx r5, r5, #8, #1
2460 ; CHECK-NEXT: rsbs r3, r3, #0
2461 ; CHECK-NEXT: rsbs r5, r5, #0
2462 ; CHECK-NEXT: vmov q4[2], q4[0], r5, r3
2463 ; CHECK-NEXT: vmov q4[3], q4[1], r5, r3
2464 ; CHECK-NEXT: vmov.s8 r3, q0[7]
2465 ; CHECK-NEXT: vmov.s8 r5, q0[6]
2466 ; CHECK-NEXT: vmov q5[2], q5[0], r5, r3
2467 ; CHECK-NEXT: asrs r3, r3, #31
2468 ; CHECK-NEXT: asrs r5, r5, #31
2469 ; CHECK-NEXT: vmov q5[3], q5[1], r5, r3
2470 ; CHECK-NEXT: vand q4, q5, q4
2471 ; CHECK-NEXT: vmov r3, r5, d8
2472 ; CHECK-NEXT: adds r3, r3, r4
2473 ; CHECK-NEXT: adcs r5, r2
2474 ; CHECK-NEXT: vmov r2, r4, d9
2475 ; CHECK-NEXT: adds.w r12, r3, r2
2476 ; CHECK-NEXT: adc.w r3, r5, r4
2477 ; CHECK-NEXT: vmov.u8 r5, q3[8]
2478 ; CHECK-NEXT: vmov.16 q4[0], r5
2479 ; CHECK-NEXT: vmov.u8 r5, q3[9]
2480 ; CHECK-NEXT: vmov.16 q4[1], r5
2481 ; CHECK-NEXT: vmov.u8 r5, q3[10]
2482 ; CHECK-NEXT: vmov.16 q4[2], r5
2483 ; CHECK-NEXT: vmov.u8 r5, q3[11]
2484 ; CHECK-NEXT: vmov.16 q4[3], r5
2485 ; CHECK-NEXT: vmov.u8 r5, q3[12]
2486 ; CHECK-NEXT: vmov.16 q4[4], r5
2487 ; CHECK-NEXT: vmov.u8 r5, q3[13]
2488 ; CHECK-NEXT: vmov.16 q4[5], r5
2489 ; CHECK-NEXT: vmov.u8 r5, q3[14]
2490 ; CHECK-NEXT: vmov.16 q4[6], r5
2491 ; CHECK-NEXT: vmov.u8 r5, q3[15]
2492 ; CHECK-NEXT: vmov.16 q4[7], r5
2493 ; CHECK-NEXT: vcmp.i16 ne, q4, zr
2494 ; CHECK-NEXT: vpsel q1, q2, q1
2495 ; CHECK-NEXT: vmov.u16 r5, q1[2]
2496 ; CHECK-NEXT: vmov.u16 r4, q1[0]
2497 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r5
2498 ; CHECK-NEXT: vmov.u16 r5, q1[3]
2499 ; CHECK-NEXT: vmov.u16 r4, q1[1]
2500 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r5
2501 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
2502 ; CHECK-NEXT: vmrs r5, p0
2503 ; CHECK-NEXT: and r2, r5, #1
2504 ; CHECK-NEXT: ubfx r4, r5, #4, #1
2505 ; CHECK-NEXT: rsbs r2, r2, #0
2506 ; CHECK-NEXT: rsbs r4, r4, #0
2507 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
2508 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
2509 ; CHECK-NEXT: vmov.s8 r2, q0[9]
2510 ; CHECK-NEXT: vmov.s8 r4, q0[8]
2511 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
2512 ; CHECK-NEXT: asrs r2, r2, #31
2513 ; CHECK-NEXT: asrs r4, r4, #31
2514 ; CHECK-NEXT: vmov q3[3], q3[1], r4, r2
2515 ; CHECK-NEXT: vand q2, q3, q2
2516 ; CHECK-NEXT: vmov r2, r4, d4
2517 ; CHECK-NEXT: adds.w r12, r12, r2
2518 ; CHECK-NEXT: adcs r3, r4
2519 ; CHECK-NEXT: vmov r4, r2, d5
2520 ; CHECK-NEXT: adds.w r4, r4, r12
2521 ; CHECK-NEXT: adcs r2, r3
2522 ; CHECK-NEXT: ubfx r3, r5, #12, #1
2523 ; CHECK-NEXT: ubfx r5, r5, #8, #1
2524 ; CHECK-NEXT: rsbs r3, r3, #0
2525 ; CHECK-NEXT: rsbs r5, r5, #0
2526 ; CHECK-NEXT: vmov q2[2], q2[0], r5, r3
2527 ; CHECK-NEXT: vmov q2[3], q2[1], r5, r3
2528 ; CHECK-NEXT: vmov.s8 r3, q0[11]
2529 ; CHECK-NEXT: vmov.s8 r5, q0[10]
2530 ; CHECK-NEXT: vmov q3[2], q3[0], r5, r3
2531 ; CHECK-NEXT: asrs r3, r3, #31
2532 ; CHECK-NEXT: asrs r5, r5, #31
2533 ; CHECK-NEXT: vmov q3[3], q3[1], r5, r3
2534 ; CHECK-NEXT: vand q2, q3, q2
2535 ; CHECK-NEXT: vmov r3, r5, d4
2536 ; CHECK-NEXT: adds r3, r3, r4
2537 ; CHECK-NEXT: adcs r5, r2
2538 ; CHECK-NEXT: vmov r2, r4, d5
2539 ; CHECK-NEXT: adds.w r12, r3, r2
2540 ; CHECK-NEXT: adc.w r3, r5, r4
2541 ; CHECK-NEXT: vmov.u16 r5, q1[6]
2542 ; CHECK-NEXT: vmov.u16 r4, q1[4]
2543 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r5
2544 ; CHECK-NEXT: vmov.u16 r5, q1[7]
2545 ; CHECK-NEXT: vmov.u16 r4, q1[5]
2546 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r5
2547 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
2548 ; CHECK-NEXT: vmrs r5, p0
2549 ; CHECK-NEXT: and r2, r5, #1
2550 ; CHECK-NEXT: ubfx r4, r5, #4, #1
2551 ; CHECK-NEXT: rsbs r2, r2, #0
2552 ; CHECK-NEXT: rsbs r4, r4, #0
2553 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
2554 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
2555 ; CHECK-NEXT: vmov.s8 r2, q0[13]
2556 ; CHECK-NEXT: vmov.s8 r4, q0[12]
2557 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
2558 ; CHECK-NEXT: asrs r2, r2, #31
2559 ; CHECK-NEXT: asrs r4, r4, #31
2560 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r2
2561 ; CHECK-NEXT: vand q1, q2, q1
2562 ; CHECK-NEXT: vmov r2, r4, d2
2563 ; CHECK-NEXT: adds.w r12, r12, r2
2564 ; CHECK-NEXT: adcs r3, r4
2565 ; CHECK-NEXT: vmov r4, r2, d3
2566 ; CHECK-NEXT: adds.w r4, r4, r12
2567 ; CHECK-NEXT: adcs r2, r3
2568 ; CHECK-NEXT: ubfx r3, r5, #12, #1
2569 ; CHECK-NEXT: ubfx r5, r5, #8, #1
2570 ; CHECK-NEXT: rsbs r3, r3, #0
2571 ; CHECK-NEXT: rsbs r5, r5, #0
2572 ; CHECK-NEXT: vmov q1[2], q1[0], r5, r3
2573 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r3
2574 ; CHECK-NEXT: vmov.s8 r3, q0[15]
2575 ; CHECK-NEXT: vmov.s8 r5, q0[14]
2576 ; CHECK-NEXT: vmov q0[2], q0[0], r5, r3
2577 ; CHECK-NEXT: asrs r3, r3, #31
2578 ; CHECK-NEXT: asrs r5, r5, #31
2579 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
2580 ; CHECK-NEXT: vand q0, q0, q1
2581 ; CHECK-NEXT: vmov r3, r5, d0
2582 ; CHECK-NEXT: adds r3, r3, r4
2583 ; CHECK-NEXT: adcs r2, r5
2584 ; CHECK-NEXT: vmov r5, r4, d1
2585 ; CHECK-NEXT: adds r3, r3, r5
2586 ; CHECK-NEXT: adcs r2, r4
2587 ; CHECK-NEXT: adds r0, r0, r3
2588 ; CHECK-NEXT: adcs r1, r2
2589 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
2590 ; CHECK-NEXT: pop {r4, r5, r7, pc}
2592 %c = icmp eq <16 x i8> %b, zeroinitializer
2593 %xx = sext <16 x i8> %x to <16 x i64>
2594 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2595 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2600 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2601 ; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
2602 ; CHECK: @ %bb.0: @ %entry
2603 ; CHECK-NEXT: .save {r7, lr}
2604 ; CHECK-NEXT: push {r7, lr}
2605 ; CHECK-NEXT: vmov.i64 q2, #0xff
2606 ; CHECK-NEXT: vand q1, q1, q2
2607 ; CHECK-NEXT: vand q0, q0, q2
2608 ; CHECK-NEXT: vmov r2, s6
2609 ; CHECK-NEXT: vmov r3, s4
2610 ; CHECK-NEXT: cmp r2, #0
2611 ; CHECK-NEXT: cset r2, eq
2612 ; CHECK-NEXT: cmp r2, #0
2613 ; CHECK-NEXT: csetm r2, ne
2614 ; CHECK-NEXT: cmp r3, #0
2615 ; CHECK-NEXT: cset r3, eq
2616 ; CHECK-NEXT: cmp r3, #0
2617 ; CHECK-NEXT: csetm r3, ne
2618 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
2619 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
2620 ; CHECK-NEXT: vand q0, q0, q1
2621 ; CHECK-NEXT: vmov r12, lr, d1
2622 ; CHECK-NEXT: vmov r2, r3, d0
2623 ; CHECK-NEXT: add r2, r12
2624 ; CHECK-NEXT: orr.w r3, r3, lr
2625 ; CHECK-NEXT: adds r0, r0, r2
2626 ; CHECK-NEXT: adcs r1, r3
2627 ; CHECK-NEXT: pop {r7, pc}
2629 %c = icmp eq <2 x i8> %b, zeroinitializer
2630 %xx = zext <2 x i8> %x to <2 x i64>
2631 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2632 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2637 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2638 ; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
2639 ; CHECK: @ %bb.0: @ %entry
2640 ; CHECK-NEXT: .save {r7, lr}
2641 ; CHECK-NEXT: push {r7, lr}
2642 ; CHECK-NEXT: vmov.i32 q2, #0xff
2643 ; CHECK-NEXT: vand q1, q1, q2
2644 ; CHECK-NEXT: vmov r2, s6
2645 ; CHECK-NEXT: vmov r3, s4
2646 ; CHECK-NEXT: cmp r2, #0
2647 ; CHECK-NEXT: cset r2, eq
2648 ; CHECK-NEXT: cmp r2, #0
2649 ; CHECK-NEXT: csetm r2, ne
2650 ; CHECK-NEXT: cmp r3, #0
2651 ; CHECK-NEXT: cset r3, eq
2652 ; CHECK-NEXT: cmp r3, #0
2653 ; CHECK-NEXT: csetm r3, ne
2654 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
2655 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
2656 ; CHECK-NEXT: vmov r2, s2
2657 ; CHECK-NEXT: vmov r3, s0
2658 ; CHECK-NEXT: sxtb r2, r2
2659 ; CHECK-NEXT: sxtb r3, r3
2660 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2661 ; CHECK-NEXT: asrs r2, r2, #31
2662 ; CHECK-NEXT: asrs r3, r3, #31
2663 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2664 ; CHECK-NEXT: vand q0, q0, q1
2665 ; CHECK-NEXT: vmov lr, r12, d1
2666 ; CHECK-NEXT: vmov r3, r2, d0
2667 ; CHECK-NEXT: adds.w r3, r3, lr
2668 ; CHECK-NEXT: adc.w r2, r2, r12
2669 ; CHECK-NEXT: adds r0, r0, r3
2670 ; CHECK-NEXT: adcs r1, r2
2671 ; CHECK-NEXT: pop {r7, pc}
2673 %c = icmp eq <2 x i8> %b, zeroinitializer
2674 %xx = sext <2 x i8> %x to <2 x i64>
2675 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2676 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2681 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %b, i64 %a) {
2682 ; CHECK-LABEL: add_v2i64_v2i64_acc:
2683 ; CHECK: @ %bb.0: @ %entry
2684 ; CHECK-NEXT: .save {r7, lr}
2685 ; CHECK-NEXT: push {r7, lr}
2686 ; CHECK-NEXT: vmov r2, r3, d3
2687 ; CHECK-NEXT: orrs r2, r3
2688 ; CHECK-NEXT: cset r2, eq
2689 ; CHECK-NEXT: cmp r2, #0
2690 ; CHECK-NEXT: vmov r3, r2, d2
2691 ; CHECK-NEXT: csetm r12, ne
2692 ; CHECK-NEXT: orrs r2, r3
2693 ; CHECK-NEXT: cset r2, eq
2694 ; CHECK-NEXT: cmp r2, #0
2695 ; CHECK-NEXT: csetm r2, ne
2696 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r12
2697 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
2698 ; CHECK-NEXT: vand q0, q0, q1
2699 ; CHECK-NEXT: vmov lr, r12, d1
2700 ; CHECK-NEXT: vmov r3, r2, d0
2701 ; CHECK-NEXT: adds.w r3, r3, lr
2702 ; CHECK-NEXT: adc.w r2, r2, r12
2703 ; CHECK-NEXT: adds r0, r0, r3
2704 ; CHECK-NEXT: adcs r1, r2
2705 ; CHECK-NEXT: pop {r7, pc}
2707 %c = icmp eq <2 x i64> %b, zeroinitializer
2708 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
2709 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2714 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
2715 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
2716 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
2717 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
2718 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
2719 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
2720 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
2721 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
2722 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
2723 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)