1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %b) {
5 ; CHECK-LABEL: add_v4i32_v4i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vpt.i32 eq, q1, zr
8 ; CHECK-NEXT: vaddvt.u32 r0, q0
11 %c = icmp eq <4 x i32> %b, zeroinitializer
12 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
13 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
17 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %b) {
18 ; CHECK-LABEL: add_v4i32_v4i64_zext:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vpt.i32 eq, q1, zr
21 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
24 %c = icmp eq <4 x i32> %b, zeroinitializer
25 %xx = zext <4 x i32> %x to <4 x i64>
26 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
27 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
31 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %b) {
32 ; CHECK-LABEL: add_v4i32_v4i64_sext:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vpt.i32 eq, q1, zr
35 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
38 %c = icmp eq <4 x i32> %b, zeroinitializer
39 %xx = sext <4 x i32> %x to <4 x i64>
40 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
41 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
45 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
46 ; CHECK-LABEL: add_v2i32_v2i64_zext:
47 ; CHECK: @ %bb.0: @ %entry
48 ; CHECK-NEXT: vmov r0, s4
49 ; CHECK-NEXT: movs r1, #0
50 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
51 ; CHECK-NEXT: vand q0, q0, q2
52 ; CHECK-NEXT: cmp r0, #0
53 ; CHECK-NEXT: csetm r0, eq
54 ; CHECK-NEXT: bfi r1, r0, #0, #8
55 ; CHECK-NEXT: vmov r0, s6
56 ; CHECK-NEXT: vmov.i32 q1, #0x0
57 ; CHECK-NEXT: cmp r0, #0
58 ; CHECK-NEXT: csetm r0, eq
59 ; CHECK-NEXT: bfi r1, r0, #8, #8
60 ; CHECK-NEXT: vmsr p0, r1
61 ; CHECK-NEXT: vpsel q0, q0, q1
62 ; CHECK-NEXT: vmov r0, r1, d1
63 ; CHECK-NEXT: vmov r2, r3, d0
64 ; CHECK-NEXT: adds r0, r0, r2
65 ; CHECK-NEXT: adcs r1, r3
68 %c = icmp eq <2 x i32> %b, zeroinitializer
69 %xx = zext <2 x i32> %x to <2 x i64>
70 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
71 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
75 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %b) {
76 ; CHECK-LABEL: add_v2i32_v2i64_sext:
77 ; CHECK: @ %bb.0: @ %entry
78 ; CHECK-NEXT: vmov r0, s2
79 ; CHECK-NEXT: vmov r1, s0
80 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
81 ; CHECK-NEXT: asrs r0, r0, #31
82 ; CHECK-NEXT: asrs r1, r1, #31
83 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
84 ; CHECK-NEXT: vmov r0, s4
85 ; CHECK-NEXT: movs r1, #0
86 ; CHECK-NEXT: cmp r0, #0
87 ; CHECK-NEXT: csetm r0, eq
88 ; CHECK-NEXT: bfi r1, r0, #0, #8
89 ; CHECK-NEXT: vmov r0, s6
90 ; CHECK-NEXT: vmov.i32 q1, #0x0
91 ; CHECK-NEXT: cmp r0, #0
92 ; CHECK-NEXT: csetm r0, eq
93 ; CHECK-NEXT: bfi r1, r0, #8, #8
94 ; CHECK-NEXT: vmsr p0, r1
95 ; CHECK-NEXT: vpsel q0, q0, q1
96 ; CHECK-NEXT: vmov r0, r1, d1
97 ; CHECK-NEXT: vmov r2, r3, d0
98 ; CHECK-NEXT: adds r0, r0, r2
99 ; CHECK-NEXT: adcs r1, r3
102 %c = icmp eq <2 x i32> %b, zeroinitializer
103 %xx = sext <2 x i32> %x to <2 x i64>
104 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
105 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
109 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %b) {
110 ; CHECK-LABEL: add_v8i16_v8i32_zext:
111 ; CHECK: @ %bb.0: @ %entry
112 ; CHECK-NEXT: vpt.i16 eq, q1, zr
113 ; CHECK-NEXT: vaddvt.u16 r0, q0
116 %c = icmp eq <8 x i16> %b, zeroinitializer
117 %xx = zext <8 x i16> %x to <8 x i32>
118 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
119 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
123 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %b) {
124 ; CHECK-LABEL: add_v8i16_v8i32_sext:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vpt.i16 eq, q1, zr
127 ; CHECK-NEXT: vaddvt.s16 r0, q0
130 %c = icmp eq <8 x i16> %b, zeroinitializer
131 %xx = sext <8 x i16> %x to <8 x i32>
132 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
133 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
137 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %b) {
138 ; CHECK-LABEL: add_v4i16_v4i32_zext:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: vmovlb.u16 q1, q1
141 ; CHECK-NEXT: vmovlb.u16 q0, q0
142 ; CHECK-NEXT: vpt.i32 eq, q1, zr
143 ; CHECK-NEXT: vaddvt.u32 r0, q0
146 %c = icmp eq <4 x i16> %b, zeroinitializer
147 %xx = zext <4 x i16> %x to <4 x i32>
148 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
149 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
153 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %b) {
154 ; CHECK-LABEL: add_v4i16_v4i32_sext:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vmovlb.u16 q1, q1
157 ; CHECK-NEXT: vmovlb.s16 q0, q0
158 ; CHECK-NEXT: vpt.i32 eq, q1, zr
159 ; CHECK-NEXT: vaddvt.u32 r0, q0
162 %c = icmp eq <4 x i16> %b, zeroinitializer
163 %xx = sext <4 x i16> %x to <4 x i32>
164 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
165 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
169 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %b) {
170 ; CHECK-LABEL: add_v8i16_v8i16:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vpt.i16 eq, q1, zr
173 ; CHECK-NEXT: vaddvt.u16 r0, q0
174 ; CHECK-NEXT: uxth r0, r0
177 %c = icmp eq <8 x i16> %b, zeroinitializer
178 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
179 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
183 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %b) {
184 ; CHECK-LABEL: add_v8i16_v8i64_zext:
185 ; CHECK: @ %bb.0: @ %entry
186 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
187 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
188 ; CHECK-NEXT: vmov.i8 q3, #0x0
189 ; CHECK-NEXT: vmov.i8 q4, #0xff
190 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
191 ; CHECK-NEXT: vpsel q5, q4, q3
192 ; CHECK-NEXT: vmov.u16 r0, q5[2]
193 ; CHECK-NEXT: vmov.u16 r1, q5[0]
194 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
195 ; CHECK-NEXT: vmov.u16 r0, q5[3]
196 ; CHECK-NEXT: vmov.u16 r1, q5[1]
197 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
198 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
199 ; CHECK-NEXT: vpsel q6, q4, q3
200 ; CHECK-NEXT: vmov r0, r1, d12
201 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
202 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
203 ; CHECK-NEXT: vmov.u16 r0, q0[1]
204 ; CHECK-NEXT: vmov.u16 r1, q0[0]
205 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
206 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
207 ; CHECK-NEXT: vmov.i64 q1, #0xffff
208 ; CHECK-NEXT: vand q7, q2, q1
209 ; CHECK-NEXT: vmov.i32 q2, #0x0
210 ; CHECK-NEXT: vpsel q7, q7, q2
211 ; CHECK-NEXT: vmov r0, r1, d15
212 ; CHECK-NEXT: vmov r2, r3, d14
213 ; CHECK-NEXT: orrs r1, r3
214 ; CHECK-NEXT: add r0, r2
215 ; CHECK-NEXT: vmov r2, r3, d13
216 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
217 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
218 ; CHECK-NEXT: vmov.u16 r2, q0[3]
219 ; CHECK-NEXT: vmov.u16 r3, q0[2]
220 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
221 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
222 ; CHECK-NEXT: vand q6, q6, q1
223 ; CHECK-NEXT: vpsel q6, q6, q2
224 ; CHECK-NEXT: vmov r2, r3, d12
225 ; CHECK-NEXT: adds r0, r0, r2
226 ; CHECK-NEXT: adcs r1, r3
227 ; CHECK-NEXT: vmov r2, r3, d13
228 ; CHECK-NEXT: adds r0, r0, r2
229 ; CHECK-NEXT: vmov.u16 r2, q5[6]
230 ; CHECK-NEXT: adcs r1, r3
231 ; CHECK-NEXT: vmov.u16 r3, q5[4]
232 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
233 ; CHECK-NEXT: vmov.u16 r2, q5[7]
234 ; CHECK-NEXT: vmov.u16 r3, q5[5]
235 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
236 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
237 ; CHECK-NEXT: vpsel q3, q4, q3
238 ; CHECK-NEXT: vmov r2, r3, d6
239 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
240 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r3
241 ; CHECK-NEXT: vmov.u16 r2, q0[5]
242 ; CHECK-NEXT: vmov.u16 r3, q0[4]
243 ; CHECK-NEXT: vcmp.i32 ne, q4, zr
244 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
245 ; CHECK-NEXT: vand q4, q4, q1
246 ; CHECK-NEXT: vpsel q4, q4, q2
247 ; CHECK-NEXT: vmov r2, r3, d8
248 ; CHECK-NEXT: adds r0, r0, r2
249 ; CHECK-NEXT: adcs r1, r3
250 ; CHECK-NEXT: vmov r2, r3, d9
251 ; CHECK-NEXT: adds r0, r0, r2
252 ; CHECK-NEXT: adcs r1, r3
253 ; CHECK-NEXT: vmov r2, r3, d7
254 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
255 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
256 ; CHECK-NEXT: vmov.u16 r2, q0[7]
257 ; CHECK-NEXT: vmov.u16 r3, q0[6]
258 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
259 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
260 ; CHECK-NEXT: vand q0, q0, q1
261 ; CHECK-NEXT: vpsel q0, q0, q2
262 ; CHECK-NEXT: vmov r2, r3, d0
263 ; CHECK-NEXT: adds r0, r0, r2
264 ; CHECK-NEXT: adcs r1, r3
265 ; CHECK-NEXT: vmov r2, r3, d1
266 ; CHECK-NEXT: adds r0, r0, r2
267 ; CHECK-NEXT: adcs r1, r3
268 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
271 %c = icmp eq <8 x i16> %b, zeroinitializer
272 %xx = zext <8 x i16> %x to <8 x i64>
273 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
274 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
278 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
279 ; CHECK-LABEL: add_v8i16_v8i64_sext:
280 ; CHECK: @ %bb.0: @ %entry
281 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
282 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
283 ; CHECK-NEXT: vmov.i8 q2, #0x0
284 ; CHECK-NEXT: vmov.i8 q3, #0xff
285 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
286 ; CHECK-NEXT: vpsel q4, q3, q2
287 ; CHECK-NEXT: vmov.u16 r0, q4[2]
288 ; CHECK-NEXT: vmov.u16 r1, q4[0]
289 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
290 ; CHECK-NEXT: vmov.u16 r0, q4[3]
291 ; CHECK-NEXT: vmov.u16 r1, q4[1]
292 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
293 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
294 ; CHECK-NEXT: vpsel q5, q3, q2
295 ; CHECK-NEXT: vmov r0, r1, d10
296 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
297 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
298 ; CHECK-NEXT: vmov.s16 r0, q0[1]
299 ; CHECK-NEXT: vmov.s16 r1, q0[0]
300 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
301 ; CHECK-NEXT: vmov q6[2], q6[0], r1, r0
302 ; CHECK-NEXT: asrs r0, r0, #31
303 ; CHECK-NEXT: asrs r1, r1, #31
304 ; CHECK-NEXT: vmov.i32 q1, #0x0
305 ; CHECK-NEXT: vmov q6[3], q6[1], r1, r0
306 ; CHECK-NEXT: vpsel q6, q6, q1
307 ; CHECK-NEXT: vmov r0, r1, d13
308 ; CHECK-NEXT: vmov r2, r3, d12
309 ; CHECK-NEXT: adds r0, r0, r2
310 ; CHECK-NEXT: adcs r1, r3
311 ; CHECK-NEXT: vmov r2, r3, d11
312 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
313 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
314 ; CHECK-NEXT: vmov.s16 r2, q0[3]
315 ; CHECK-NEXT: vmov.s16 r3, q0[2]
316 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
317 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
318 ; CHECK-NEXT: asrs r2, r2, #31
319 ; CHECK-NEXT: asrs r3, r3, #31
320 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
321 ; CHECK-NEXT: vpsel q5, q5, q1
322 ; CHECK-NEXT: vmov r2, r3, d10
323 ; CHECK-NEXT: adds r0, r0, r2
324 ; CHECK-NEXT: adcs r1, r3
325 ; CHECK-NEXT: vmov r2, r3, d11
326 ; CHECK-NEXT: adds r0, r0, r2
327 ; CHECK-NEXT: vmov.u16 r2, q4[6]
328 ; CHECK-NEXT: adcs r1, r3
329 ; CHECK-NEXT: vmov.u16 r3, q4[4]
330 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
331 ; CHECK-NEXT: vmov.u16 r2, q4[7]
332 ; CHECK-NEXT: vmov.u16 r3, q4[5]
333 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
334 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
335 ; CHECK-NEXT: vpsel q2, q3, q2
336 ; CHECK-NEXT: vmov r2, r3, d4
337 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
338 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
339 ; CHECK-NEXT: vmov.s16 r2, q0[5]
340 ; CHECK-NEXT: vmov.s16 r3, q0[4]
341 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
342 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
343 ; CHECK-NEXT: asrs r2, r2, #31
344 ; CHECK-NEXT: asrs r3, r3, #31
345 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
346 ; CHECK-NEXT: vpsel q3, q3, q1
347 ; CHECK-NEXT: vmov r2, r3, d6
348 ; CHECK-NEXT: adds r0, r0, r2
349 ; CHECK-NEXT: adcs r1, r3
350 ; CHECK-NEXT: vmov r2, r3, d7
351 ; CHECK-NEXT: adds r0, r0, r2
352 ; CHECK-NEXT: adcs r1, r3
353 ; CHECK-NEXT: vmov r2, r3, d5
354 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
355 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
356 ; CHECK-NEXT: vmov.s16 r2, q0[7]
357 ; CHECK-NEXT: vmov.s16 r3, q0[6]
358 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
359 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
360 ; CHECK-NEXT: asrs r2, r2, #31
361 ; CHECK-NEXT: asrs r3, r3, #31
362 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
363 ; CHECK-NEXT: vpsel q0, q0, q1
364 ; CHECK-NEXT: vmov r2, r3, d0
365 ; CHECK-NEXT: adds r0, r0, r2
366 ; CHECK-NEXT: adcs r1, r3
367 ; CHECK-NEXT: vmov r2, r3, d1
368 ; CHECK-NEXT: adds r0, r0, r2
369 ; CHECK-NEXT: adcs r1, r3
370 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
373 %c = icmp eq <8 x i16> %b, zeroinitializer
374 %xx = sext <8 x i16> %x to <8 x i64>
375 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
376 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
380 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %b) {
381 ; CHECK-LABEL: add_v4i16_v4i64_zext:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vmovlb.u16 q1, q1
384 ; CHECK-NEXT: vmovlb.u16 q0, q0
385 ; CHECK-NEXT: vpt.i32 eq, q1, zr
386 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
389 %c = icmp eq <4 x i16> %b, zeroinitializer
390 %xx = zext <4 x i16> %x to <4 x i64>
391 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
392 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
396 define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %b) {
397 ; CHECK-LABEL: add_v4i16_v4i64_sext:
398 ; CHECK: @ %bb.0: @ %entry
399 ; CHECK-NEXT: vmovlb.u16 q1, q1
400 ; CHECK-NEXT: vmovlb.s16 q0, q0
401 ; CHECK-NEXT: vpt.i32 eq, q1, zr
402 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
405 %c = icmp eq <4 x i16> %b, zeroinitializer
406 %xx = sext <4 x i16> %x to <4 x i64>
407 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
408 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
412 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
413 ; CHECK-LABEL: add_v2i16_v2i64_zext:
414 ; CHECK: @ %bb.0: @ %entry
415 ; CHECK-NEXT: vmov.i64 q2, #0xffff
416 ; CHECK-NEXT: movs r1, #0
417 ; CHECK-NEXT: vand q1, q1, q2
418 ; CHECK-NEXT: vand q0, q0, q2
419 ; CHECK-NEXT: vmov r0, s4
420 ; CHECK-NEXT: cmp r0, #0
421 ; CHECK-NEXT: csetm r0, eq
422 ; CHECK-NEXT: bfi r1, r0, #0, #8
423 ; CHECK-NEXT: vmov r0, s6
424 ; CHECK-NEXT: vmov.i32 q1, #0x0
425 ; CHECK-NEXT: cmp r0, #0
426 ; CHECK-NEXT: csetm r0, eq
427 ; CHECK-NEXT: bfi r1, r0, #8, #8
428 ; CHECK-NEXT: vmsr p0, r1
429 ; CHECK-NEXT: vpsel q0, q0, q1
430 ; CHECK-NEXT: vmov r0, r1, d1
431 ; CHECK-NEXT: vmov r2, r3, d0
432 ; CHECK-NEXT: add r0, r2
433 ; CHECK-NEXT: orrs r1, r3
436 %c = icmp eq <2 x i16> %b, zeroinitializer
437 %xx = zext <2 x i16> %x to <2 x i64>
438 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
439 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
443 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %b) {
444 ; CHECK-LABEL: add_v2i16_v2i64_sext:
445 ; CHECK: @ %bb.0: @ %entry
446 ; CHECK-NEXT: vmov.i32 q2, #0xffff
447 ; CHECK-NEXT: movs r1, #0
448 ; CHECK-NEXT: vand q1, q1, q2
449 ; CHECK-NEXT: vmov r0, s4
450 ; CHECK-NEXT: cmp r0, #0
451 ; CHECK-NEXT: csetm r0, eq
452 ; CHECK-NEXT: bfi r1, r0, #0, #8
453 ; CHECK-NEXT: vmov r0, s6
454 ; CHECK-NEXT: vmov.i32 q1, #0x0
455 ; CHECK-NEXT: cmp r0, #0
456 ; CHECK-NEXT: csetm r0, eq
457 ; CHECK-NEXT: bfi r1, r0, #8, #8
458 ; CHECK-NEXT: vmov r0, s2
459 ; CHECK-NEXT: vmsr p0, r1
460 ; CHECK-NEXT: vmov r1, s0
461 ; CHECK-NEXT: sxth r0, r0
462 ; CHECK-NEXT: sxth r1, r1
463 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
464 ; CHECK-NEXT: asrs r0, r0, #31
465 ; CHECK-NEXT: asrs r1, r1, #31
466 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
467 ; CHECK-NEXT: vpsel q0, q0, q1
468 ; CHECK-NEXT: vmov r0, r1, d1
469 ; CHECK-NEXT: vmov r2, r3, d0
470 ; CHECK-NEXT: adds r0, r0, r2
471 ; CHECK-NEXT: adcs r1, r3
474 %c = icmp eq <2 x i16> %b, zeroinitializer
475 %xx = sext <2 x i16> %x to <2 x i64>
476 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
477 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
481 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %b) {
482 ; CHECK-LABEL: add_v16i8_v16i32_zext:
483 ; CHECK: @ %bb.0: @ %entry
484 ; CHECK-NEXT: vpt.i8 eq, q1, zr
485 ; CHECK-NEXT: vaddvt.u8 r0, q0
488 %c = icmp eq <16 x i8> %b, zeroinitializer
489 %xx = zext <16 x i8> %x to <16 x i32>
490 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
491 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
495 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %b) {
496 ; CHECK-LABEL: add_v16i8_v16i32_sext:
497 ; CHECK: @ %bb.0: @ %entry
498 ; CHECK-NEXT: vpt.i8 eq, q1, zr
499 ; CHECK-NEXT: vaddvt.s8 r0, q0
502 %c = icmp eq <16 x i8> %b, zeroinitializer
503 %xx = sext <16 x i8> %x to <16 x i32>
504 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
505 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
509 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %b) {
510 ; CHECK-LABEL: add_v8i8_v8i32_zext:
511 ; CHECK: @ %bb.0: @ %entry
512 ; CHECK-NEXT: vmovlb.u8 q1, q1
513 ; CHECK-NEXT: vmovlb.u8 q0, q0
514 ; CHECK-NEXT: vpt.i16 eq, q1, zr
515 ; CHECK-NEXT: vaddvt.u16 r0, q0
518 %c = icmp eq <8 x i8> %b, zeroinitializer
519 %xx = zext <8 x i8> %x to <8 x i32>
520 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
521 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
525 define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %b) {
526 ; CHECK-LABEL: add_v8i8_v8i32_sext:
527 ; CHECK: @ %bb.0: @ %entry
528 ; CHECK-NEXT: vmovlb.u8 q1, q1
529 ; CHECK-NEXT: vmovlb.s8 q0, q0
530 ; CHECK-NEXT: vpt.i16 eq, q1, zr
531 ; CHECK-NEXT: vaddvt.s16 r0, q0
534 %c = icmp eq <8 x i8> %b, zeroinitializer
535 %xx = sext <8 x i8> %x to <8 x i32>
536 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
537 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
541 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %b) {
542 ; CHECK-LABEL: add_v4i8_v4i32_zext:
543 ; CHECK: @ %bb.0: @ %entry
544 ; CHECK-NEXT: vmov.i32 q2, #0xff
545 ; CHECK-NEXT: vand q1, q1, q2
546 ; CHECK-NEXT: vand q0, q0, q2
547 ; CHECK-NEXT: vpt.i32 eq, q1, zr
548 ; CHECK-NEXT: vaddvt.u32 r0, q0
551 %c = icmp eq <4 x i8> %b, zeroinitializer
552 %xx = zext <4 x i8> %x to <4 x i32>
553 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
554 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
558 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %b) {
559 ; CHECK-LABEL: add_v4i8_v4i32_sext:
560 ; CHECK: @ %bb.0: @ %entry
561 ; CHECK-NEXT: vmov.i32 q2, #0xff
562 ; CHECK-NEXT: vmovlb.s8 q0, q0
563 ; CHECK-NEXT: vand q1, q1, q2
564 ; CHECK-NEXT: vmovlb.s16 q0, q0
565 ; CHECK-NEXT: vpt.i32 eq, q1, zr
566 ; CHECK-NEXT: vaddvt.u32 r0, q0
569 %c = icmp eq <4 x i8> %b, zeroinitializer
570 %xx = sext <4 x i8> %x to <4 x i32>
571 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
572 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
576 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %b) {
577 ; CHECK-LABEL: add_v16i8_v16i16_zext:
578 ; CHECK: @ %bb.0: @ %entry
579 ; CHECK-NEXT: vpt.i8 eq, q1, zr
580 ; CHECK-NEXT: vaddvt.u8 r0, q0
581 ; CHECK-NEXT: uxth r0, r0
584 %c = icmp eq <16 x i8> %b, zeroinitializer
585 %xx = zext <16 x i8> %x to <16 x i16>
586 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
587 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
591 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %b) {
592 ; CHECK-LABEL: add_v16i8_v16i16_sext:
593 ; CHECK: @ %bb.0: @ %entry
594 ; CHECK-NEXT: vpt.i8 eq, q1, zr
595 ; CHECK-NEXT: vaddvt.s8 r0, q0
596 ; CHECK-NEXT: sxth r0, r0
599 %c = icmp eq <16 x i8> %b, zeroinitializer
600 %xx = sext <16 x i8> %x to <16 x i16>
601 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
602 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
606 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %b) {
607 ; CHECK-LABEL: add_v8i8_v8i16_zext:
608 ; CHECK: @ %bb.0: @ %entry
609 ; CHECK-NEXT: vmovlb.u8 q1, q1
610 ; CHECK-NEXT: vmovlb.u8 q0, q0
611 ; CHECK-NEXT: vpt.i16 eq, q1, zr
612 ; CHECK-NEXT: vaddvt.u16 r0, q0
613 ; CHECK-NEXT: uxth r0, r0
616 %c = icmp eq <8 x i8> %b, zeroinitializer
617 %xx = zext <8 x i8> %x to <8 x i16>
618 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
619 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
623 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %b) {
624 ; CHECK-LABEL: add_v8i8_v8i16_sext:
625 ; CHECK: @ %bb.0: @ %entry
626 ; CHECK-NEXT: vmovlb.u8 q1, q1
627 ; CHECK-NEXT: vmovlb.s8 q0, q0
628 ; CHECK-NEXT: vpt.i16 eq, q1, zr
629 ; CHECK-NEXT: vaddvt.u16 r0, q0
630 ; CHECK-NEXT: sxth r0, r0
633 %c = icmp eq <8 x i8> %b, zeroinitializer
634 %xx = sext <8 x i8> %x to <8 x i16>
635 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
636 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
640 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %b) {
641 ; CHECK-LABEL: add_v16i8_v16i8:
642 ; CHECK: @ %bb.0: @ %entry
643 ; CHECK-NEXT: vpt.i8 eq, q1, zr
644 ; CHECK-NEXT: vaddvt.u8 r0, q0
645 ; CHECK-NEXT: uxtb r0, r0
648 %c = icmp eq <16 x i8> %b, zeroinitializer
649 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
650 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
654 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %b) {
655 ; CHECK-LABEL: add_v16i8_v16i64_zext:
656 ; CHECK: @ %bb.0: @ %entry
657 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
658 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
659 ; CHECK-NEXT: .pad #16
660 ; CHECK-NEXT: sub sp, #16
661 ; CHECK-NEXT: vmov q2, q0
662 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
663 ; CHECK-NEXT: vmov.i8 q0, #0x0
664 ; CHECK-NEXT: vmov.i8 q1, #0xff
665 ; CHECK-NEXT: vpsel q5, q1, q0
666 ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
667 ; CHECK-NEXT: vmov.u8 r0, q5[0]
668 ; CHECK-NEXT: vmov.16 q3[0], r0
669 ; CHECK-NEXT: vmov.u8 r0, q5[1]
670 ; CHECK-NEXT: vmov.16 q3[1], r0
671 ; CHECK-NEXT: vmov.u8 r0, q5[2]
672 ; CHECK-NEXT: vmov.16 q3[2], r0
673 ; CHECK-NEXT: vmov.u8 r0, q5[3]
674 ; CHECK-NEXT: vmov.16 q3[3], r0
675 ; CHECK-NEXT: vmov.u8 r0, q5[4]
676 ; CHECK-NEXT: vmov.16 q3[4], r0
677 ; CHECK-NEXT: vmov.u8 r0, q5[5]
678 ; CHECK-NEXT: vmov.16 q3[5], r0
679 ; CHECK-NEXT: vmov.u8 r0, q5[6]
680 ; CHECK-NEXT: vmov.16 q3[6], r0
681 ; CHECK-NEXT: vmov.u8 r0, q5[7]
682 ; CHECK-NEXT: vmov.16 q3[7], r0
683 ; CHECK-NEXT: vcmp.i16 ne, q3, zr
684 ; CHECK-NEXT: vpsel q6, q1, q0
685 ; CHECK-NEXT: vmov.u16 r0, q6[2]
686 ; CHECK-NEXT: vmov.u16 r1, q6[0]
687 ; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
688 ; CHECK-NEXT: vmov.u16 r0, q6[3]
689 ; CHECK-NEXT: vmov.u16 r1, q6[1]
690 ; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
691 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
692 ; CHECK-NEXT: vpsel q7, q1, q0
693 ; CHECK-NEXT: vmov r0, r1, d14
694 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r1
695 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r1
696 ; CHECK-NEXT: vmov.u8 r0, q2[1]
697 ; CHECK-NEXT: vmov.u8 r1, q2[0]
698 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
699 ; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
700 ; CHECK-NEXT: vmov.i64 q3, #0xff
701 ; CHECK-NEXT: vand q0, q4, q3
702 ; CHECK-NEXT: vmov.i32 q4, #0x0
703 ; CHECK-NEXT: vpsel q0, q0, q4
704 ; CHECK-NEXT: vmov r0, r1, d1
705 ; CHECK-NEXT: vmov r2, r3, d0
706 ; CHECK-NEXT: orrs r1, r3
707 ; CHECK-NEXT: add r0, r2
708 ; CHECK-NEXT: vmov r2, r3, d15
709 ; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload
710 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
711 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
712 ; CHECK-NEXT: vmov.u8 r2, q2[3]
713 ; CHECK-NEXT: vmov.u8 r3, q2[2]
714 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
715 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
716 ; CHECK-NEXT: vand q0, q0, q3
717 ; CHECK-NEXT: vpsel q0, q0, q4
718 ; CHECK-NEXT: vmov r2, r3, d0
719 ; CHECK-NEXT: adds r0, r0, r2
720 ; CHECK-NEXT: adcs r1, r3
721 ; CHECK-NEXT: vmov r2, r3, d1
722 ; CHECK-NEXT: adds r0, r0, r2
723 ; CHECK-NEXT: vmov.u16 r2, q6[6]
724 ; CHECK-NEXT: adcs r1, r3
725 ; CHECK-NEXT: vmov.u16 r3, q6[4]
726 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
727 ; CHECK-NEXT: vmov.u16 r2, q6[7]
728 ; CHECK-NEXT: vmov.u16 r3, q6[5]
729 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
730 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
731 ; CHECK-NEXT: vpsel q6, q1, q7
732 ; CHECK-NEXT: vmov r2, r3, d12
733 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
734 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
735 ; CHECK-NEXT: vmov.u8 r2, q2[5]
736 ; CHECK-NEXT: vmov.u8 r3, q2[4]
737 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
738 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
739 ; CHECK-NEXT: vand q0, q0, q3
740 ; CHECK-NEXT: vpsel q0, q0, q4
741 ; CHECK-NEXT: vmov r2, r3, d0
742 ; CHECK-NEXT: adds r0, r0, r2
743 ; CHECK-NEXT: adcs r1, r3
744 ; CHECK-NEXT: vmov r2, r3, d1
745 ; CHECK-NEXT: adds r0, r0, r2
746 ; CHECK-NEXT: adcs r1, r3
747 ; CHECK-NEXT: vmov r2, r3, d13
748 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
749 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
750 ; CHECK-NEXT: vmov.u8 r2, q2[7]
751 ; CHECK-NEXT: vmov.u8 r3, q2[6]
752 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
753 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
754 ; CHECK-NEXT: vand q0, q0, q3
755 ; CHECK-NEXT: vpsel q0, q0, q4
756 ; CHECK-NEXT: vmov r2, r3, d0
757 ; CHECK-NEXT: adds r0, r0, r2
758 ; CHECK-NEXT: adcs r1, r3
759 ; CHECK-NEXT: vmov r2, r3, d1
760 ; CHECK-NEXT: adds r0, r0, r2
761 ; CHECK-NEXT: vmov.u8 r2, q5[8]
762 ; CHECK-NEXT: vmov.16 q6[0], r2
763 ; CHECK-NEXT: vmov.u8 r2, q5[9]
764 ; CHECK-NEXT: vmov.16 q6[1], r2
765 ; CHECK-NEXT: vmov.u8 r2, q5[10]
766 ; CHECK-NEXT: vmov.16 q6[2], r2
767 ; CHECK-NEXT: vmov.u8 r2, q5[11]
768 ; CHECK-NEXT: vmov.16 q6[3], r2
769 ; CHECK-NEXT: vmov.u8 r2, q5[12]
770 ; CHECK-NEXT: vmov.16 q6[4], r2
771 ; CHECK-NEXT: vmov.u8 r2, q5[13]
772 ; CHECK-NEXT: vmov.16 q6[5], r2
773 ; CHECK-NEXT: vmov.u8 r2, q5[14]
774 ; CHECK-NEXT: vmov.16 q6[6], r2
775 ; CHECK-NEXT: vmov.u8 r2, q5[15]
776 ; CHECK-NEXT: vmov.16 q6[7], r2
777 ; CHECK-NEXT: adcs r1, r3
778 ; CHECK-NEXT: vcmp.i16 ne, q6, zr
779 ; CHECK-NEXT: vpsel q5, q1, q7
780 ; CHECK-NEXT: vmov.u16 r2, q5[2]
781 ; CHECK-NEXT: vmov.u16 r3, q5[0]
782 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
783 ; CHECK-NEXT: vmov.u16 r2, q5[3]
784 ; CHECK-NEXT: vmov.u16 r3, q5[1]
785 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
786 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
787 ; CHECK-NEXT: vpsel q6, q1, q7
788 ; CHECK-NEXT: vmov r2, r3, d12
789 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
790 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
791 ; CHECK-NEXT: vmov.u8 r2, q2[9]
792 ; CHECK-NEXT: vmov.u8 r3, q2[8]
793 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
794 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
795 ; CHECK-NEXT: vand q0, q0, q3
796 ; CHECK-NEXT: vpsel q0, q0, q4
797 ; CHECK-NEXT: vmov r2, r3, d0
798 ; CHECK-NEXT: adds r0, r0, r2
799 ; CHECK-NEXT: adcs r1, r3
800 ; CHECK-NEXT: vmov r2, r3, d1
801 ; CHECK-NEXT: adds r0, r0, r2
802 ; CHECK-NEXT: adcs r1, r3
803 ; CHECK-NEXT: vmov r2, r3, d13
804 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
805 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
806 ; CHECK-NEXT: vmov.u8 r2, q2[11]
807 ; CHECK-NEXT: vmov.u8 r3, q2[10]
808 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
809 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
810 ; CHECK-NEXT: vand q0, q0, q3
811 ; CHECK-NEXT: vpsel q0, q0, q4
812 ; CHECK-NEXT: vmov r2, r3, d0
813 ; CHECK-NEXT: adds r0, r0, r2
814 ; CHECK-NEXT: adcs r1, r3
815 ; CHECK-NEXT: vmov r2, r3, d1
816 ; CHECK-NEXT: adds r0, r0, r2
817 ; CHECK-NEXT: vmov.u16 r2, q5[6]
818 ; CHECK-NEXT: adcs r1, r3
819 ; CHECK-NEXT: vmov.u16 r3, q5[4]
820 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
821 ; CHECK-NEXT: vmov.u16 r2, q5[7]
822 ; CHECK-NEXT: vmov.u16 r3, q5[5]
823 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
824 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
825 ; CHECK-NEXT: vpsel q1, q1, q7
826 ; CHECK-NEXT: vmov r2, r3, d2
827 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
828 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
829 ; CHECK-NEXT: vmov.u8 r2, q2[13]
830 ; CHECK-NEXT: vmov.u8 r3, q2[12]
831 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
832 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
833 ; CHECK-NEXT: vand q0, q0, q3
834 ; CHECK-NEXT: vpsel q0, q0, q4
835 ; CHECK-NEXT: vmov r2, r3, d0
836 ; CHECK-NEXT: adds r0, r0, r2
837 ; CHECK-NEXT: adcs r1, r3
838 ; CHECK-NEXT: vmov r2, r3, d1
839 ; CHECK-NEXT: adds r0, r0, r2
840 ; CHECK-NEXT: adcs r1, r3
841 ; CHECK-NEXT: vmov r2, r3, d3
842 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
843 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
844 ; CHECK-NEXT: vmov.u8 r2, q2[15]
845 ; CHECK-NEXT: vmov.u8 r3, q2[14]
846 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
847 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
848 ; CHECK-NEXT: vand q0, q0, q3
849 ; CHECK-NEXT: vpsel q0, q0, q4
850 ; CHECK-NEXT: vmov r2, r3, d0
851 ; CHECK-NEXT: adds r0, r0, r2
852 ; CHECK-NEXT: adcs r1, r3
853 ; CHECK-NEXT: vmov r2, r3, d1
854 ; CHECK-NEXT: adds r0, r0, r2
855 ; CHECK-NEXT: adcs r1, r3
856 ; CHECK-NEXT: add sp, #16
857 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
860 %c = icmp eq <16 x i8> %b, zeroinitializer
861 %xx = zext <16 x i8> %x to <16 x i64>
862 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
863 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
867 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
868 ; CHECK-LABEL: add_v16i8_v16i64_sext:
869 ; CHECK: @ %bb.0: @ %entry
870 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
871 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
872 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
873 ; CHECK-NEXT: vmov.i8 q1, #0x0
874 ; CHECK-NEXT: vmov.i8 q2, #0xff
875 ; CHECK-NEXT: vpsel q4, q2, q1
876 ; CHECK-NEXT: vmov.u8 r0, q4[0]
877 ; CHECK-NEXT: vmov.16 q3[0], r0
878 ; CHECK-NEXT: vmov.u8 r0, q4[1]
879 ; CHECK-NEXT: vmov.16 q3[1], r0
880 ; CHECK-NEXT: vmov.u8 r0, q4[2]
881 ; CHECK-NEXT: vmov.16 q3[2], r0
882 ; CHECK-NEXT: vmov.u8 r0, q4[3]
883 ; CHECK-NEXT: vmov.16 q3[3], r0
884 ; CHECK-NEXT: vmov.u8 r0, q4[4]
885 ; CHECK-NEXT: vmov.16 q3[4], r0
886 ; CHECK-NEXT: vmov.u8 r0, q4[5]
887 ; CHECK-NEXT: vmov.16 q3[5], r0
888 ; CHECK-NEXT: vmov.u8 r0, q4[6]
889 ; CHECK-NEXT: vmov.16 q3[6], r0
890 ; CHECK-NEXT: vmov.u8 r0, q4[7]
891 ; CHECK-NEXT: vmov.16 q3[7], r0
892 ; CHECK-NEXT: vcmp.i16 ne, q3, zr
893 ; CHECK-NEXT: vpsel q5, q2, q1
894 ; CHECK-NEXT: vmov.u16 r0, q5[2]
895 ; CHECK-NEXT: vmov.u16 r1, q5[0]
896 ; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
897 ; CHECK-NEXT: vmov.u16 r0, q5[3]
898 ; CHECK-NEXT: vmov.u16 r1, q5[1]
899 ; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
900 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
901 ; CHECK-NEXT: vpsel q6, q2, q1
902 ; CHECK-NEXT: vmov r0, r1, d12
903 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r1
904 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r1
905 ; CHECK-NEXT: vmov.s8 r0, q0[1]
906 ; CHECK-NEXT: vmov.s8 r1, q0[0]
907 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
908 ; CHECK-NEXT: vmov q7[2], q7[0], r1, r0
909 ; CHECK-NEXT: asrs r0, r0, #31
910 ; CHECK-NEXT: asrs r1, r1, #31
911 ; CHECK-NEXT: vmov.i32 q3, #0x0
912 ; CHECK-NEXT: vmov q7[3], q7[1], r1, r0
913 ; CHECK-NEXT: vpsel q7, q7, q3
914 ; CHECK-NEXT: vmov r0, r1, d15
915 ; CHECK-NEXT: vmov r2, r3, d14
916 ; CHECK-NEXT: adds r0, r0, r2
917 ; CHECK-NEXT: adcs r1, r3
918 ; CHECK-NEXT: vmov r2, r3, d13
919 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
920 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
921 ; CHECK-NEXT: vmov.s8 r2, q0[3]
922 ; CHECK-NEXT: vmov.s8 r3, q0[2]
923 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
924 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
925 ; CHECK-NEXT: asrs r2, r2, #31
926 ; CHECK-NEXT: asrs r3, r3, #31
927 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
928 ; CHECK-NEXT: vpsel q6, q6, q3
929 ; CHECK-NEXT: vmov r2, r3, d12
930 ; CHECK-NEXT: adds r0, r0, r2
931 ; CHECK-NEXT: adcs r1, r3
932 ; CHECK-NEXT: vmov r2, r3, d13
933 ; CHECK-NEXT: adds r0, r0, r2
934 ; CHECK-NEXT: vmov.u16 r2, q5[6]
935 ; CHECK-NEXT: adcs r1, r3
936 ; CHECK-NEXT: vmov.u16 r3, q5[4]
937 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
938 ; CHECK-NEXT: vmov.u16 r2, q5[7]
939 ; CHECK-NEXT: vmov.u16 r3, q5[5]
940 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
941 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
942 ; CHECK-NEXT: vpsel q5, q2, q1
943 ; CHECK-NEXT: vmov r2, r3, d10
944 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
945 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
946 ; CHECK-NEXT: vmov.s8 r2, q0[5]
947 ; CHECK-NEXT: vmov.s8 r3, q0[4]
948 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
949 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
950 ; CHECK-NEXT: asrs r2, r2, #31
951 ; CHECK-NEXT: asrs r3, r3, #31
952 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
953 ; CHECK-NEXT: vpsel q6, q6, q3
954 ; CHECK-NEXT: vmov r2, r3, d12
955 ; CHECK-NEXT: adds r0, r0, r2
956 ; CHECK-NEXT: adcs r1, r3
957 ; CHECK-NEXT: vmov r2, r3, d13
958 ; CHECK-NEXT: adds r0, r0, r2
959 ; CHECK-NEXT: adcs r1, r3
960 ; CHECK-NEXT: vmov r2, r3, d11
961 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
962 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
963 ; CHECK-NEXT: vmov.s8 r2, q0[7]
964 ; CHECK-NEXT: vmov.s8 r3, q0[6]
965 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
966 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
967 ; CHECK-NEXT: asrs r2, r2, #31
968 ; CHECK-NEXT: asrs r3, r3, #31
969 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
970 ; CHECK-NEXT: vpsel q5, q5, q3
971 ; CHECK-NEXT: vmov r2, r3, d10
972 ; CHECK-NEXT: adds r0, r0, r2
973 ; CHECK-NEXT: adcs r1, r3
974 ; CHECK-NEXT: vmov r2, r3, d11
975 ; CHECK-NEXT: adds r0, r0, r2
976 ; CHECK-NEXT: vmov.u8 r2, q4[8]
977 ; CHECK-NEXT: vmov.16 q5[0], r2
978 ; CHECK-NEXT: vmov.u8 r2, q4[9]
979 ; CHECK-NEXT: vmov.16 q5[1], r2
980 ; CHECK-NEXT: vmov.u8 r2, q4[10]
981 ; CHECK-NEXT: vmov.16 q5[2], r2
982 ; CHECK-NEXT: vmov.u8 r2, q4[11]
983 ; CHECK-NEXT: vmov.16 q5[3], r2
984 ; CHECK-NEXT: vmov.u8 r2, q4[12]
985 ; CHECK-NEXT: vmov.16 q5[4], r2
986 ; CHECK-NEXT: vmov.u8 r2, q4[13]
987 ; CHECK-NEXT: vmov.16 q5[5], r2
988 ; CHECK-NEXT: vmov.u8 r2, q4[14]
989 ; CHECK-NEXT: vmov.16 q5[6], r2
990 ; CHECK-NEXT: vmov.u8 r2, q4[15]
991 ; CHECK-NEXT: vmov.16 q5[7], r2
992 ; CHECK-NEXT: adcs r1, r3
993 ; CHECK-NEXT: vcmp.i16 ne, q5, zr
994 ; CHECK-NEXT: vpsel q4, q2, q1
995 ; CHECK-NEXT: vmov.u16 r2, q4[2]
996 ; CHECK-NEXT: vmov.u16 r3, q4[0]
997 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
998 ; CHECK-NEXT: vmov.u16 r2, q4[3]
999 ; CHECK-NEXT: vmov.u16 r3, q4[1]
1000 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1001 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1002 ; CHECK-NEXT: vpsel q5, q2, q1
1003 ; CHECK-NEXT: vmov r2, r3, d10
1004 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
1005 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
1006 ; CHECK-NEXT: vmov.s8 r2, q0[9]
1007 ; CHECK-NEXT: vmov.s8 r3, q0[8]
1008 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
1009 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1010 ; CHECK-NEXT: asrs r2, r2, #31
1011 ; CHECK-NEXT: asrs r3, r3, #31
1012 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
1013 ; CHECK-NEXT: vpsel q6, q6, q3
1014 ; CHECK-NEXT: vmov r2, r3, d12
1015 ; CHECK-NEXT: adds r0, r0, r2
1016 ; CHECK-NEXT: adcs r1, r3
1017 ; CHECK-NEXT: vmov r2, r3, d13
1018 ; CHECK-NEXT: adds r0, r0, r2
1019 ; CHECK-NEXT: adcs r1, r3
1020 ; CHECK-NEXT: vmov r2, r3, d11
1021 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
1022 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
1023 ; CHECK-NEXT: vmov.s8 r2, q0[11]
1024 ; CHECK-NEXT: vmov.s8 r3, q0[10]
1025 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1026 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1027 ; CHECK-NEXT: asrs r2, r2, #31
1028 ; CHECK-NEXT: asrs r3, r3, #31
1029 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1030 ; CHECK-NEXT: vpsel q5, q5, q3
1031 ; CHECK-NEXT: vmov r2, r3, d10
1032 ; CHECK-NEXT: adds r0, r0, r2
1033 ; CHECK-NEXT: adcs r1, r3
1034 ; CHECK-NEXT: vmov r2, r3, d11
1035 ; CHECK-NEXT: adds r0, r0, r2
1036 ; CHECK-NEXT: vmov.u16 r2, q4[6]
1037 ; CHECK-NEXT: adcs r1, r3
1038 ; CHECK-NEXT: vmov.u16 r3, q4[4]
1039 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1040 ; CHECK-NEXT: vmov.u16 r2, q4[7]
1041 ; CHECK-NEXT: vmov.u16 r3, q4[5]
1042 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1043 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1044 ; CHECK-NEXT: vpsel q1, q2, q1
1045 ; CHECK-NEXT: vmov r2, r3, d2
1046 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1047 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
1048 ; CHECK-NEXT: vmov.s8 r2, q0[13]
1049 ; CHECK-NEXT: vmov.s8 r3, q0[12]
1050 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1051 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1052 ; CHECK-NEXT: asrs r2, r2, #31
1053 ; CHECK-NEXT: asrs r3, r3, #31
1054 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
1055 ; CHECK-NEXT: vpsel q2, q2, q3
1056 ; CHECK-NEXT: vmov r2, r3, d4
1057 ; CHECK-NEXT: adds r0, r0, r2
1058 ; CHECK-NEXT: adcs r1, r3
1059 ; CHECK-NEXT: vmov r2, r3, d5
1060 ; CHECK-NEXT: adds r0, r0, r2
1061 ; CHECK-NEXT: adcs r1, r3
1062 ; CHECK-NEXT: vmov r2, r3, d3
1063 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
1064 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
1065 ; CHECK-NEXT: vmov.s8 r2, q0[15]
1066 ; CHECK-NEXT: vmov.s8 r3, q0[14]
1067 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1068 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1069 ; CHECK-NEXT: asrs r2, r2, #31
1070 ; CHECK-NEXT: asrs r3, r3, #31
1071 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1072 ; CHECK-NEXT: vpsel q0, q0, q3
1073 ; CHECK-NEXT: vmov r2, r3, d0
1074 ; CHECK-NEXT: adds r0, r0, r2
1075 ; CHECK-NEXT: adcs r1, r3
1076 ; CHECK-NEXT: vmov r2, r3, d1
1077 ; CHECK-NEXT: adds r0, r0, r2
1078 ; CHECK-NEXT: adcs r1, r3
1079 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1082 %c = icmp eq <16 x i8> %b, zeroinitializer
1083 %xx = sext <16 x i8> %x to <16 x i64>
1084 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
1085 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
1089 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %b) {
1090 ; CHECK-LABEL: add_v8i8_v8i64_zext:
1091 ; CHECK: @ %bb.0: @ %entry
1092 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
1093 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
1094 ; CHECK-NEXT: vmovlb.u8 q1, q1
1095 ; CHECK-NEXT: vmov.i8 q3, #0x0
1096 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1097 ; CHECK-NEXT: vmov.i8 q4, #0xff
1098 ; CHECK-NEXT: vpsel q5, q4, q3
1099 ; CHECK-NEXT: vmovlb.u8 q0, q0
1100 ; CHECK-NEXT: vmov.u16 r0, q5[2]
1101 ; CHECK-NEXT: vmov.u16 r1, q5[0]
1102 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1103 ; CHECK-NEXT: vmov.u16 r0, q5[3]
1104 ; CHECK-NEXT: vmov.u16 r1, q5[1]
1105 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1106 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1107 ; CHECK-NEXT: vpsel q6, q4, q3
1108 ; CHECK-NEXT: vmov r0, r1, d12
1109 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
1110 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
1111 ; CHECK-NEXT: vmov.u16 r0, q0[1]
1112 ; CHECK-NEXT: vmov.u16 r1, q0[0]
1113 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1114 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
1115 ; CHECK-NEXT: vmov.i64 q1, #0xffff
1116 ; CHECK-NEXT: vand q7, q2, q1
1117 ; CHECK-NEXT: vmov.i32 q2, #0x0
1118 ; CHECK-NEXT: vpsel q7, q7, q2
1119 ; CHECK-NEXT: vmov r0, r1, d15
1120 ; CHECK-NEXT: vmov r2, r3, d14
1121 ; CHECK-NEXT: orrs r1, r3
1122 ; CHECK-NEXT: add r0, r2
1123 ; CHECK-NEXT: vmov r2, r3, d13
1124 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
1125 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
1126 ; CHECK-NEXT: vmov.u16 r2, q0[3]
1127 ; CHECK-NEXT: vmov.u16 r3, q0[2]
1128 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
1129 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1130 ; CHECK-NEXT: vand q6, q6, q1
1131 ; CHECK-NEXT: vpsel q6, q6, q2
1132 ; CHECK-NEXT: vmov r2, r3, d12
1133 ; CHECK-NEXT: adds r0, r0, r2
1134 ; CHECK-NEXT: adcs r1, r3
1135 ; CHECK-NEXT: vmov r2, r3, d13
1136 ; CHECK-NEXT: adds r0, r0, r2
1137 ; CHECK-NEXT: vmov.u16 r2, q5[6]
1138 ; CHECK-NEXT: adcs r1, r3
1139 ; CHECK-NEXT: vmov.u16 r3, q5[4]
1140 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1141 ; CHECK-NEXT: vmov.u16 r2, q5[7]
1142 ; CHECK-NEXT: vmov.u16 r3, q5[5]
1143 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
1144 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
1145 ; CHECK-NEXT: vpsel q3, q4, q3
1146 ; CHECK-NEXT: vmov r2, r3, d6
1147 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
1148 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r3
1149 ; CHECK-NEXT: vmov.u16 r2, q0[5]
1150 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1151 ; CHECK-NEXT: vcmp.i32 ne, q4, zr
1152 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
1153 ; CHECK-NEXT: vand q4, q4, q1
1154 ; CHECK-NEXT: vpsel q4, q4, q2
1155 ; CHECK-NEXT: vmov r2, r3, d8
1156 ; CHECK-NEXT: adds r0, r0, r2
1157 ; CHECK-NEXT: adcs r1, r3
1158 ; CHECK-NEXT: vmov r2, r3, d9
1159 ; CHECK-NEXT: adds r0, r0, r2
1160 ; CHECK-NEXT: adcs r1, r3
1161 ; CHECK-NEXT: vmov r2, r3, d7
1162 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1163 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
1164 ; CHECK-NEXT: vmov.u16 r2, q0[7]
1165 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1166 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1167 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1168 ; CHECK-NEXT: vand q0, q0, q1
1169 ; CHECK-NEXT: vpsel q0, q0, q2
1170 ; CHECK-NEXT: vmov r2, r3, d0
1171 ; CHECK-NEXT: adds r0, r0, r2
1172 ; CHECK-NEXT: adcs r1, r3
1173 ; CHECK-NEXT: vmov r2, r3, d1
1174 ; CHECK-NEXT: adds r0, r0, r2
1175 ; CHECK-NEXT: adcs r1, r3
1176 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1179 %c = icmp eq <8 x i8> %b, zeroinitializer
1180 %xx = zext <8 x i8> %x to <8 x i64>
1181 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1182 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1186 define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %b) {
1187 ; CHECK-LABEL: add_v8i8_v8i64_sext:
1188 ; CHECK: @ %bb.0: @ %entry
1189 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
1190 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
1191 ; CHECK-NEXT: vmovlb.u8 q1, q1
1192 ; CHECK-NEXT: vmov.i8 q2, #0x0
1193 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1194 ; CHECK-NEXT: vmov.i8 q3, #0xff
1195 ; CHECK-NEXT: vpsel q4, q3, q2
1196 ; CHECK-NEXT: vmov.u16 r0, q4[2]
1197 ; CHECK-NEXT: vmov.u16 r1, q4[0]
1198 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
1199 ; CHECK-NEXT: vmov.u16 r0, q4[3]
1200 ; CHECK-NEXT: vmov.u16 r1, q4[1]
1201 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
1202 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1203 ; CHECK-NEXT: vpsel q5, q3, q2
1204 ; CHECK-NEXT: vmov r0, r1, d10
1205 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
1206 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
1207 ; CHECK-NEXT: vmov.u16 r0, q0[1]
1208 ; CHECK-NEXT: vmov.u16 r1, q0[0]
1209 ; CHECK-NEXT: sxtb r0, r0
1210 ; CHECK-NEXT: sxtb r1, r1
1211 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1212 ; CHECK-NEXT: vmov q6[2], q6[0], r1, r0
1213 ; CHECK-NEXT: asrs r0, r0, #31
1214 ; CHECK-NEXT: asrs r1, r1, #31
1215 ; CHECK-NEXT: vmov.i32 q1, #0x0
1216 ; CHECK-NEXT: vmov q6[3], q6[1], r1, r0
1217 ; CHECK-NEXT: vpsel q6, q6, q1
1218 ; CHECK-NEXT: vmov r0, r1, d13
1219 ; CHECK-NEXT: vmov r2, r3, d12
1220 ; CHECK-NEXT: adds r0, r0, r2
1221 ; CHECK-NEXT: adcs r1, r3
1222 ; CHECK-NEXT: vmov r2, r3, d11
1223 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
1224 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
1225 ; CHECK-NEXT: vmov.u16 r2, q0[3]
1226 ; CHECK-NEXT: vmov.u16 r3, q0[2]
1227 ; CHECK-NEXT: sxtb r2, r2
1228 ; CHECK-NEXT: sxtb r3, r3
1229 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1230 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1231 ; CHECK-NEXT: asrs r2, r2, #31
1232 ; CHECK-NEXT: asrs r3, r3, #31
1233 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1234 ; CHECK-NEXT: vpsel q5, q5, q1
1235 ; CHECK-NEXT: vmov r2, r3, d10
1236 ; CHECK-NEXT: adds r0, r0, r2
1237 ; CHECK-NEXT: adcs r1, r3
1238 ; CHECK-NEXT: vmov r2, r3, d11
1239 ; CHECK-NEXT: adds r0, r0, r2
1240 ; CHECK-NEXT: vmov.u16 r2, q4[6]
1241 ; CHECK-NEXT: adcs r1, r3
1242 ; CHECK-NEXT: vmov.u16 r3, q4[4]
1243 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1244 ; CHECK-NEXT: vmov.u16 r2, q4[7]
1245 ; CHECK-NEXT: vmov.u16 r3, q4[5]
1246 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1247 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1248 ; CHECK-NEXT: vpsel q2, q3, q2
1249 ; CHECK-NEXT: vmov r2, r3, d4
1250 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1251 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
1252 ; CHECK-NEXT: vmov.u16 r2, q0[5]
1253 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1254 ; CHECK-NEXT: sxtb r2, r2
1255 ; CHECK-NEXT: sxtb r3, r3
1256 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1257 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
1258 ; CHECK-NEXT: asrs r2, r2, #31
1259 ; CHECK-NEXT: asrs r3, r3, #31
1260 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
1261 ; CHECK-NEXT: vpsel q3, q3, q1
1262 ; CHECK-NEXT: vmov r2, r3, d6
1263 ; CHECK-NEXT: adds r0, r0, r2
1264 ; CHECK-NEXT: adcs r1, r3
1265 ; CHECK-NEXT: vmov r2, r3, d7
1266 ; CHECK-NEXT: adds r0, r0, r2
1267 ; CHECK-NEXT: adcs r1, r3
1268 ; CHECK-NEXT: vmov r2, r3, d5
1269 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1270 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
1271 ; CHECK-NEXT: vmov.u16 r2, q0[7]
1272 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1273 ; CHECK-NEXT: sxtb r2, r2
1274 ; CHECK-NEXT: sxtb r3, r3
1275 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1276 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1277 ; CHECK-NEXT: asrs r2, r2, #31
1278 ; CHECK-NEXT: asrs r3, r3, #31
1279 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1280 ; CHECK-NEXT: vpsel q0, q0, q1
1281 ; CHECK-NEXT: vmov r2, r3, d0
1282 ; CHECK-NEXT: adds r0, r0, r2
1283 ; CHECK-NEXT: adcs r1, r3
1284 ; CHECK-NEXT: vmov r2, r3, d1
1285 ; CHECK-NEXT: adds r0, r0, r2
1286 ; CHECK-NEXT: adcs r1, r3
1287 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
1290 %c = icmp eq <8 x i8> %b, zeroinitializer
1291 %xx = sext <8 x i8> %x to <8 x i64>
1292 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1293 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1297 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %b) {
1298 ; CHECK-LABEL: add_v4i8_v4i64_zext:
1299 ; CHECK: @ %bb.0: @ %entry
1300 ; CHECK-NEXT: vmov.i32 q2, #0xff
1301 ; CHECK-NEXT: vand q1, q1, q2
1302 ; CHECK-NEXT: vand q0, q0, q2
1303 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1304 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
1307 %c = icmp eq <4 x i8> %b, zeroinitializer
1308 %xx = zext <4 x i8> %x to <4 x i64>
1309 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1310 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1314 define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %b) {
1315 ; CHECK-LABEL: add_v4i8_v4i64_sext:
1316 ; CHECK: @ %bb.0: @ %entry
1317 ; CHECK-NEXT: vmov.i32 q2, #0xff
1318 ; CHECK-NEXT: vmovlb.s8 q0, q0
1319 ; CHECK-NEXT: vand q1, q1, q2
1320 ; CHECK-NEXT: vmovlb.s16 q0, q0
1321 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1322 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
1325 %c = icmp eq <4 x i8> %b, zeroinitializer
1326 %xx = sext <4 x i8> %x to <4 x i64>
1327 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1328 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1332 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
1333 ; CHECK-LABEL: add_v2i8_v2i64_zext:
1334 ; CHECK: @ %bb.0: @ %entry
1335 ; CHECK-NEXT: vmov.i64 q2, #0xff
1336 ; CHECK-NEXT: movs r1, #0
1337 ; CHECK-NEXT: vand q1, q1, q2
1338 ; CHECK-NEXT: vand q0, q0, q2
1339 ; CHECK-NEXT: vmov r0, s4
1340 ; CHECK-NEXT: cmp r0, #0
1341 ; CHECK-NEXT: csetm r0, eq
1342 ; CHECK-NEXT: bfi r1, r0, #0, #8
1343 ; CHECK-NEXT: vmov r0, s6
1344 ; CHECK-NEXT: vmov.i32 q1, #0x0
1345 ; CHECK-NEXT: cmp r0, #0
1346 ; CHECK-NEXT: csetm r0, eq
1347 ; CHECK-NEXT: bfi r1, r0, #8, #8
1348 ; CHECK-NEXT: vmsr p0, r1
1349 ; CHECK-NEXT: vpsel q0, q0, q1
1350 ; CHECK-NEXT: vmov r0, r1, d1
1351 ; CHECK-NEXT: vmov r2, r3, d0
1352 ; CHECK-NEXT: add r0, r2
1353 ; CHECK-NEXT: orrs r1, r3
1356 %c = icmp eq <2 x i8> %b, zeroinitializer
1357 %xx = zext <2 x i8> %x to <2 x i64>
1358 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1359 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1363 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %b) {
1364 ; CHECK-LABEL: add_v2i8_v2i64_sext:
1365 ; CHECK: @ %bb.0: @ %entry
1366 ; CHECK-NEXT: vmov.i32 q2, #0xff
1367 ; CHECK-NEXT: movs r1, #0
1368 ; CHECK-NEXT: vand q1, q1, q2
1369 ; CHECK-NEXT: vmov r0, s4
1370 ; CHECK-NEXT: cmp r0, #0
1371 ; CHECK-NEXT: csetm r0, eq
1372 ; CHECK-NEXT: bfi r1, r0, #0, #8
1373 ; CHECK-NEXT: vmov r0, s6
1374 ; CHECK-NEXT: vmov.i32 q1, #0x0
1375 ; CHECK-NEXT: cmp r0, #0
1376 ; CHECK-NEXT: csetm r0, eq
1377 ; CHECK-NEXT: bfi r1, r0, #8, #8
1378 ; CHECK-NEXT: vmov r0, s2
1379 ; CHECK-NEXT: vmsr p0, r1
1380 ; CHECK-NEXT: vmov r1, s0
1381 ; CHECK-NEXT: sxtb r0, r0
1382 ; CHECK-NEXT: sxtb r1, r1
1383 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
1384 ; CHECK-NEXT: asrs r0, r0, #31
1385 ; CHECK-NEXT: asrs r1, r1, #31
1386 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
1387 ; CHECK-NEXT: vpsel q0, q0, q1
1388 ; CHECK-NEXT: vmov r0, r1, d1
1389 ; CHECK-NEXT: vmov r2, r3, d0
1390 ; CHECK-NEXT: adds r0, r0, r2
1391 ; CHECK-NEXT: adcs r1, r3
1394 %c = icmp eq <2 x i8> %b, zeroinitializer
1395 %xx = sext <2 x i8> %x to <2 x i64>
1396 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1397 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1401 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %b) {
1402 ; CHECK-LABEL: add_v2i64_v2i64:
1403 ; CHECK: @ %bb.0: @ %entry
1404 ; CHECK-NEXT: vmov r0, r1, d2
1405 ; CHECK-NEXT: orrs r0, r1
1406 ; CHECK-NEXT: mov.w r1, #0
1407 ; CHECK-NEXT: csetm r0, eq
1408 ; CHECK-NEXT: bfi r1, r0, #0, #8
1409 ; CHECK-NEXT: vmov r0, r2, d3
1410 ; CHECK-NEXT: vmov.i32 q1, #0x0
1411 ; CHECK-NEXT: orrs r0, r2
1412 ; CHECK-NEXT: csetm r0, eq
1413 ; CHECK-NEXT: bfi r1, r0, #8, #8
1414 ; CHECK-NEXT: vmsr p0, r1
1415 ; CHECK-NEXT: vpsel q0, q0, q1
1416 ; CHECK-NEXT: vmov r0, r1, d1
1417 ; CHECK-NEXT: vmov r2, r3, d0
1418 ; CHECK-NEXT: adds r0, r0, r2
1419 ; CHECK-NEXT: adcs r1, r3
1422 %c = icmp eq <2 x i64> %b, zeroinitializer
1423 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
1424 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1428 define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %b, i32 %a) {
1429 ; CHECK-LABEL: add_v4i32_v4i32_acc:
1430 ; CHECK: @ %bb.0: @ %entry
1431 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1432 ; CHECK-NEXT: vaddvat.u32 r0, q0
1435 %c = icmp eq <4 x i32> %b, zeroinitializer
1436 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1437 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1442 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1443 ; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
1444 ; CHECK: @ %bb.0: @ %entry
1445 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1446 ; CHECK-NEXT: vaddlvat.u32 r0, r1, q0
1449 %c = icmp eq <4 x i32> %b, zeroinitializer
1450 %xx = zext <4 x i32> %x to <4 x i64>
1451 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1452 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1457 define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1458 ; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
1459 ; CHECK: @ %bb.0: @ %entry
1460 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1461 ; CHECK-NEXT: vaddlvat.s32 r0, r1, q0
1464 %c = icmp eq <4 x i32> %b, zeroinitializer
1465 %xx = sext <4 x i32> %x to <4 x i64>
1466 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1467 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1472 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1473 ; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
1474 ; CHECK: @ %bb.0: @ %entry
1475 ; CHECK-NEXT: .save {r7, lr}
1476 ; CHECK-NEXT: push {r7, lr}
1477 ; CHECK-NEXT: vmov r2, s4
1478 ; CHECK-NEXT: movs r3, #0
1479 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
1480 ; CHECK-NEXT: vand q0, q0, q2
1481 ; CHECK-NEXT: cmp r2, #0
1482 ; CHECK-NEXT: csetm r2, eq
1483 ; CHECK-NEXT: bfi r3, r2, #0, #8
1484 ; CHECK-NEXT: vmov r2, s6
1485 ; CHECK-NEXT: vmov.i32 q1, #0x0
1486 ; CHECK-NEXT: cmp r2, #0
1487 ; CHECK-NEXT: csetm r2, eq
1488 ; CHECK-NEXT: bfi r3, r2, #8, #8
1489 ; CHECK-NEXT: vmsr p0, r3
1490 ; CHECK-NEXT: vpsel q0, q0, q1
1491 ; CHECK-NEXT: vmov lr, r12, d1
1492 ; CHECK-NEXT: vmov r3, r2, d0
1493 ; CHECK-NEXT: adds.w r3, r3, lr
1494 ; CHECK-NEXT: adc.w r2, r2, r12
1495 ; CHECK-NEXT: adds r0, r0, r3
1496 ; CHECK-NEXT: adcs r1, r2
1497 ; CHECK-NEXT: pop {r7, pc}
1499 %c = icmp eq <2 x i32> %b, zeroinitializer
1500 %xx = zext <2 x i32> %x to <2 x i64>
1501 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1502 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1507 define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1508 ; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
1509 ; CHECK: @ %bb.0: @ %entry
1510 ; CHECK-NEXT: .save {r7, lr}
1511 ; CHECK-NEXT: push {r7, lr}
1512 ; CHECK-NEXT: vmov r2, s2
1513 ; CHECK-NEXT: vmov r3, s0
1514 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1515 ; CHECK-NEXT: asrs r2, r2, #31
1516 ; CHECK-NEXT: asrs r3, r3, #31
1517 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1518 ; CHECK-NEXT: vmov r2, s4
1519 ; CHECK-NEXT: movs r3, #0
1520 ; CHECK-NEXT: cmp r2, #0
1521 ; CHECK-NEXT: csetm r2, eq
1522 ; CHECK-NEXT: bfi r3, r2, #0, #8
1523 ; CHECK-NEXT: vmov r2, s6
1524 ; CHECK-NEXT: vmov.i32 q1, #0x0
1525 ; CHECK-NEXT: cmp r2, #0
1526 ; CHECK-NEXT: csetm r2, eq
1527 ; CHECK-NEXT: bfi r3, r2, #8, #8
1528 ; CHECK-NEXT: vmsr p0, r3
1529 ; CHECK-NEXT: vpsel q0, q0, q1
1530 ; CHECK-NEXT: vmov lr, r12, d1
1531 ; CHECK-NEXT: vmov r3, r2, d0
1532 ; CHECK-NEXT: adds.w r3, r3, lr
1533 ; CHECK-NEXT: adc.w r2, r2, r12
1534 ; CHECK-NEXT: adds r0, r0, r3
1535 ; CHECK-NEXT: adcs r1, r2
1536 ; CHECK-NEXT: pop {r7, pc}
1538 %c = icmp eq <2 x i32> %b, zeroinitializer
1539 %xx = sext <2 x i32> %x to <2 x i64>
1540 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1541 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1546 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1547 ; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
1548 ; CHECK: @ %bb.0: @ %entry
1549 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1550 ; CHECK-NEXT: vaddvat.u16 r0, q0
1553 %c = icmp eq <8 x i16> %b, zeroinitializer
1554 %xx = zext <8 x i16> %x to <8 x i32>
1555 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1556 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1561 define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1562 ; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
1563 ; CHECK: @ %bb.0: @ %entry
1564 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1565 ; CHECK-NEXT: vaddvat.s16 r0, q0
1568 %c = icmp eq <8 x i16> %b, zeroinitializer
1569 %xx = sext <8 x i16> %x to <8 x i32>
1570 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1571 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1576 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1577 ; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
1578 ; CHECK: @ %bb.0: @ %entry
1579 ; CHECK-NEXT: vmovlb.u16 q1, q1
1580 ; CHECK-NEXT: vmovlb.u16 q0, q0
1581 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1582 ; CHECK-NEXT: vaddvat.u32 r0, q0
1585 %c = icmp eq <4 x i16> %b, zeroinitializer
1586 %xx = zext <4 x i16> %x to <4 x i32>
1587 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1588 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1593 define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1594 ; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
1595 ; CHECK: @ %bb.0: @ %entry
1596 ; CHECK-NEXT: vmovlb.u16 q1, q1
1597 ; CHECK-NEXT: vmovlb.s16 q0, q0
1598 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1599 ; CHECK-NEXT: vaddvat.u32 r0, q0
1602 %c = icmp eq <4 x i16> %b, zeroinitializer
1603 %xx = sext <4 x i16> %x to <4 x i32>
1604 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1605 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1610 define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %b, i16 %a) {
1611 ; CHECK-LABEL: add_v8i16_v8i16_acc:
1612 ; CHECK: @ %bb.0: @ %entry
1613 ; CHECK-NEXT: vpt.i16 eq, q1, zr
1614 ; CHECK-NEXT: vaddvat.u16 r0, q0
1615 ; CHECK-NEXT: uxth r0, r0
1618 %c = icmp eq <8 x i16> %b, zeroinitializer
1619 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1620 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
1625 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1626 ; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
1627 ; CHECK: @ %bb.0: @ %entry
1628 ; CHECK-NEXT: .save {r7, lr}
1629 ; CHECK-NEXT: push {r7, lr}
1630 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
1631 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
1632 ; CHECK-NEXT: vmov.i8 q3, #0x0
1633 ; CHECK-NEXT: vmov.i8 q4, #0xff
1634 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1635 ; CHECK-NEXT: vpsel q5, q4, q3
1636 ; CHECK-NEXT: vmov.u16 r2, q5[2]
1637 ; CHECK-NEXT: vmov.u16 r3, q5[0]
1638 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1639 ; CHECK-NEXT: vmov.u16 r2, q5[3]
1640 ; CHECK-NEXT: vmov.u16 r3, q5[1]
1641 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1642 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1643 ; CHECK-NEXT: vpsel q6, q4, q3
1644 ; CHECK-NEXT: vmov r2, r3, d12
1645 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
1646 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
1647 ; CHECK-NEXT: vmov.u16 r2, q0[1]
1648 ; CHECK-NEXT: vmov.u16 r3, q0[0]
1649 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1650 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
1651 ; CHECK-NEXT: vmov.i64 q1, #0xffff
1652 ; CHECK-NEXT: vand q7, q2, q1
1653 ; CHECK-NEXT: vmov.i32 q2, #0x0
1654 ; CHECK-NEXT: vpsel q7, q7, q2
1655 ; CHECK-NEXT: vmov r12, lr, d15
1656 ; CHECK-NEXT: vmov r2, r3, d14
1657 ; CHECK-NEXT: orr.w lr, lr, r3
1658 ; CHECK-NEXT: add r12, r2
1659 ; CHECK-NEXT: vmov r3, r2, d13
1660 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1661 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
1662 ; CHECK-NEXT: vmov.u16 r2, q0[3]
1663 ; CHECK-NEXT: vmov.u16 r3, q0[2]
1664 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
1665 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1666 ; CHECK-NEXT: vand q6, q6, q1
1667 ; CHECK-NEXT: vpsel q6, q6, q2
1668 ; CHECK-NEXT: vmov r2, r3, d12
1669 ; CHECK-NEXT: adds.w r12, r12, r2
1670 ; CHECK-NEXT: adc.w lr, lr, r3
1671 ; CHECK-NEXT: vmov r2, r3, d13
1672 ; CHECK-NEXT: adds.w r12, r12, r2
1673 ; CHECK-NEXT: vmov.u16 r2, q5[6]
1674 ; CHECK-NEXT: adc.w lr, lr, r3
1675 ; CHECK-NEXT: vmov.u16 r3, q5[4]
1676 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1677 ; CHECK-NEXT: vmov.u16 r2, q5[7]
1678 ; CHECK-NEXT: vmov.u16 r3, q5[5]
1679 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
1680 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
1681 ; CHECK-NEXT: vpsel q3, q4, q3
1682 ; CHECK-NEXT: vmov r2, r3, d6
1683 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
1684 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r3
1685 ; CHECK-NEXT: vmov.u16 r2, q0[5]
1686 ; CHECK-NEXT: vmov.u16 r3, q0[4]
1687 ; CHECK-NEXT: vcmp.i32 ne, q4, zr
1688 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
1689 ; CHECK-NEXT: vand q4, q4, q1
1690 ; CHECK-NEXT: vpsel q4, q4, q2
1691 ; CHECK-NEXT: vmov r2, r3, d8
1692 ; CHECK-NEXT: adds.w r12, r12, r2
1693 ; CHECK-NEXT: adc.w lr, lr, r3
1694 ; CHECK-NEXT: vmov r2, r3, d9
1695 ; CHECK-NEXT: adds.w r12, r12, r2
1696 ; CHECK-NEXT: adc.w lr, lr, r3
1697 ; CHECK-NEXT: vmov r2, r3, d7
1698 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1699 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
1700 ; CHECK-NEXT: vmov.u16 r2, q0[7]
1701 ; CHECK-NEXT: vmov.u16 r3, q0[6]
1702 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1703 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1704 ; CHECK-NEXT: vand q0, q0, q1
1705 ; CHECK-NEXT: vpsel q0, q0, q2
1706 ; CHECK-NEXT: vmov r2, r3, d0
1707 ; CHECK-NEXT: adds.w r12, r12, r2
1708 ; CHECK-NEXT: adc.w lr, lr, r3
1709 ; CHECK-NEXT: vmov r2, r3, d1
1710 ; CHECK-NEXT: adds.w r2, r2, r12
1711 ; CHECK-NEXT: adc.w r3, r3, lr
1712 ; CHECK-NEXT: adds r0, r0, r2
1713 ; CHECK-NEXT: adcs r1, r3
1714 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1715 ; CHECK-NEXT: pop {r7, pc}
1717 %c = icmp eq <8 x i16> %b, zeroinitializer
1718 %xx = zext <8 x i16> %x to <8 x i64>
1719 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1720 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1725 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1726 ; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
1727 ; CHECK: @ %bb.0: @ %entry
1728 ; CHECK-NEXT: .save {r7, lr}
1729 ; CHECK-NEXT: push {r7, lr}
1730 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
1731 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
1732 ; CHECK-NEXT: vmov.i8 q2, #0x0
1733 ; CHECK-NEXT: vmov.i8 q3, #0xff
1734 ; CHECK-NEXT: vcmp.i16 eq, q1, zr
1735 ; CHECK-NEXT: vpsel q4, q3, q2
1736 ; CHECK-NEXT: vmov.u16 r2, q4[2]
1737 ; CHECK-NEXT: vmov.u16 r3, q4[0]
1738 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
1739 ; CHECK-NEXT: vmov.u16 r2, q4[3]
1740 ; CHECK-NEXT: vmov.u16 r3, q4[1]
1741 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
1742 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1743 ; CHECK-NEXT: vpsel q5, q3, q2
1744 ; CHECK-NEXT: vmov r2, r3, d10
1745 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
1746 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
1747 ; CHECK-NEXT: vmov.s16 r2, q0[1]
1748 ; CHECK-NEXT: vmov.s16 r3, q0[0]
1749 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
1750 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
1751 ; CHECK-NEXT: asrs r2, r2, #31
1752 ; CHECK-NEXT: asrs r3, r3, #31
1753 ; CHECK-NEXT: vmov.i32 q1, #0x0
1754 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
1755 ; CHECK-NEXT: vpsel q6, q6, q1
1756 ; CHECK-NEXT: vmov lr, r12, d13
1757 ; CHECK-NEXT: vmov r3, r2, d12
1758 ; CHECK-NEXT: adds.w lr, lr, r3
1759 ; CHECK-NEXT: adc.w r12, r12, r2
1760 ; CHECK-NEXT: vmov r2, r3, d11
1761 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
1762 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
1763 ; CHECK-NEXT: vmov.s16 r2, q0[3]
1764 ; CHECK-NEXT: vmov.s16 r3, q0[2]
1765 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1766 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1767 ; CHECK-NEXT: asrs r2, r2, #31
1768 ; CHECK-NEXT: asrs r3, r3, #31
1769 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1770 ; CHECK-NEXT: vpsel q5, q5, q1
1771 ; CHECK-NEXT: vmov r2, r3, d10
1772 ; CHECK-NEXT: adds.w lr, lr, r2
1773 ; CHECK-NEXT: adc.w r12, r12, r3
1774 ; CHECK-NEXT: vmov r2, r3, d11
1775 ; CHECK-NEXT: adds.w lr, lr, r2
1776 ; CHECK-NEXT: vmov.u16 r2, q4[6]
1777 ; CHECK-NEXT: adc.w r12, r12, r3
1778 ; CHECK-NEXT: vmov.u16 r3, q4[4]
1779 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
1780 ; CHECK-NEXT: vmov.u16 r2, q4[7]
1781 ; CHECK-NEXT: vmov.u16 r3, q4[5]
1782 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
1783 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
1784 ; CHECK-NEXT: vpsel q2, q3, q2
1785 ; CHECK-NEXT: vmov r2, r3, d4
1786 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
1787 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
1788 ; CHECK-NEXT: vmov.s16 r2, q0[5]
1789 ; CHECK-NEXT: vmov.s16 r3, q0[4]
1790 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
1791 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
1792 ; CHECK-NEXT: asrs r2, r2, #31
1793 ; CHECK-NEXT: asrs r3, r3, #31
1794 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
1795 ; CHECK-NEXT: vpsel q3, q3, q1
1796 ; CHECK-NEXT: vmov r2, r3, d6
1797 ; CHECK-NEXT: adds.w lr, lr, r2
1798 ; CHECK-NEXT: adc.w r12, r12, r3
1799 ; CHECK-NEXT: vmov r2, r3, d7
1800 ; CHECK-NEXT: adds.w lr, lr, r2
1801 ; CHECK-NEXT: adc.w r12, r12, r3
1802 ; CHECK-NEXT: vmov r2, r3, d5
1803 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
1804 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
1805 ; CHECK-NEXT: vmov.s16 r2, q0[7]
1806 ; CHECK-NEXT: vmov.s16 r3, q0[6]
1807 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
1808 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1809 ; CHECK-NEXT: asrs r2, r2, #31
1810 ; CHECK-NEXT: asrs r3, r3, #31
1811 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1812 ; CHECK-NEXT: vpsel q0, q0, q1
1813 ; CHECK-NEXT: vmov r2, r3, d0
1814 ; CHECK-NEXT: adds.w lr, lr, r2
1815 ; CHECK-NEXT: adc.w r12, r12, r3
1816 ; CHECK-NEXT: vmov r2, r3, d1
1817 ; CHECK-NEXT: adds.w r2, r2, lr
1818 ; CHECK-NEXT: adc.w r3, r3, r12
1819 ; CHECK-NEXT: adds r0, r0, r2
1820 ; CHECK-NEXT: adcs r1, r3
1821 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
1822 ; CHECK-NEXT: pop {r7, pc}
1824 %c = icmp eq <8 x i16> %b, zeroinitializer
1825 %xx = sext <8 x i16> %x to <8 x i64>
1826 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1827 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1832 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1833 ; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
1834 ; CHECK: @ %bb.0: @ %entry
1835 ; CHECK-NEXT: .save {r7, lr}
1836 ; CHECK-NEXT: push {r7, lr}
1837 ; CHECK-NEXT: vmov.i64 q2, #0xffff
1838 ; CHECK-NEXT: movs r3, #0
1839 ; CHECK-NEXT: vand q1, q1, q2
1840 ; CHECK-NEXT: vand q0, q0, q2
1841 ; CHECK-NEXT: vmov r2, s4
1842 ; CHECK-NEXT: cmp r2, #0
1843 ; CHECK-NEXT: csetm r2, eq
1844 ; CHECK-NEXT: bfi r3, r2, #0, #8
1845 ; CHECK-NEXT: vmov r2, s6
1846 ; CHECK-NEXT: vmov.i32 q1, #0x0
1847 ; CHECK-NEXT: cmp r2, #0
1848 ; CHECK-NEXT: csetm r2, eq
1849 ; CHECK-NEXT: bfi r3, r2, #8, #8
1850 ; CHECK-NEXT: vmsr p0, r3
1851 ; CHECK-NEXT: vpsel q0, q0, q1
1852 ; CHECK-NEXT: vmov r12, lr, d1
1853 ; CHECK-NEXT: vmov r2, r3, d0
1854 ; CHECK-NEXT: add r2, r12
1855 ; CHECK-NEXT: orr.w r3, r3, lr
1856 ; CHECK-NEXT: adds r0, r0, r2
1857 ; CHECK-NEXT: adcs r1, r3
1858 ; CHECK-NEXT: pop {r7, pc}
1860 %c = icmp eq <2 x i16> %b, zeroinitializer
1861 %xx = zext <2 x i16> %x to <2 x i64>
1862 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1863 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1868 define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1869 ; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
1870 ; CHECK: @ %bb.0: @ %entry
1871 ; CHECK-NEXT: .save {r7, lr}
1872 ; CHECK-NEXT: push {r7, lr}
1873 ; CHECK-NEXT: vmov.i32 q2, #0xffff
1874 ; CHECK-NEXT: movs r3, #0
1875 ; CHECK-NEXT: vand q1, q1, q2
1876 ; CHECK-NEXT: vmov r2, s4
1877 ; CHECK-NEXT: cmp r2, #0
1878 ; CHECK-NEXT: csetm r2, eq
1879 ; CHECK-NEXT: bfi r3, r2, #0, #8
1880 ; CHECK-NEXT: vmov r2, s6
1881 ; CHECK-NEXT: vmov.i32 q1, #0x0
1882 ; CHECK-NEXT: cmp r2, #0
1883 ; CHECK-NEXT: csetm r2, eq
1884 ; CHECK-NEXT: bfi r3, r2, #8, #8
1885 ; CHECK-NEXT: vmov r2, s2
1886 ; CHECK-NEXT: vmsr p0, r3
1887 ; CHECK-NEXT: vmov r3, s0
1888 ; CHECK-NEXT: sxth r2, r2
1889 ; CHECK-NEXT: sxth r3, r3
1890 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
1891 ; CHECK-NEXT: asrs r2, r2, #31
1892 ; CHECK-NEXT: asrs r3, r3, #31
1893 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
1894 ; CHECK-NEXT: vpsel q0, q0, q1
1895 ; CHECK-NEXT: vmov lr, r12, d1
1896 ; CHECK-NEXT: vmov r3, r2, d0
1897 ; CHECK-NEXT: adds.w r3, r3, lr
1898 ; CHECK-NEXT: adc.w r2, r2, r12
1899 ; CHECK-NEXT: adds r0, r0, r3
1900 ; CHECK-NEXT: adcs r1, r2
1901 ; CHECK-NEXT: pop {r7, pc}
1903 %c = icmp eq <2 x i16> %b, zeroinitializer
1904 %xx = sext <2 x i16> %x to <2 x i64>
1905 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1906 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1911 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1912 ; CHECK-LABEL: add_v16i8_v16i32_acc_zext:
1913 ; CHECK: @ %bb.0: @ %entry
1914 ; CHECK-NEXT: vpt.i8 eq, q1, zr
1915 ; CHECK-NEXT: vaddvat.u8 r0, q0
1918 %c = icmp eq <16 x i8> %b, zeroinitializer
1919 %xx = zext <16 x i8> %x to <16 x i32>
1920 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
1921 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
1926 define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1927 ; CHECK-LABEL: add_v16i8_v16i32_acc_sext:
1928 ; CHECK: @ %bb.0: @ %entry
1929 ; CHECK-NEXT: vpt.i8 eq, q1, zr
1930 ; CHECK-NEXT: vaddvat.s8 r0, q0
1933 %c = icmp eq <16 x i8> %b, zeroinitializer
1934 %xx = sext <16 x i8> %x to <16 x i32>
1935 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
1936 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
1941 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
1942 ; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
1943 ; CHECK: @ %bb.0: @ %entry
1944 ; CHECK-NEXT: vmov.i32 q2, #0xff
1945 ; CHECK-NEXT: vand q1, q1, q2
1946 ; CHECK-NEXT: vand q0, q0, q2
1947 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1948 ; CHECK-NEXT: vaddvat.u32 r0, q0
1951 %c = icmp eq <4 x i8> %b, zeroinitializer
1952 %xx = zext <4 x i8> %x to <4 x i32>
1953 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1954 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1959 define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
1960 ; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
1961 ; CHECK: @ %bb.0: @ %entry
1962 ; CHECK-NEXT: vmov.i32 q2, #0xff
1963 ; CHECK-NEXT: vmovlb.s8 q0, q0
1964 ; CHECK-NEXT: vand q1, q1, q2
1965 ; CHECK-NEXT: vmovlb.s16 q0, q0
1966 ; CHECK-NEXT: vpt.i32 eq, q1, zr
1967 ; CHECK-NEXT: vaddvat.u32 r0, q0
1970 %c = icmp eq <4 x i8> %b, zeroinitializer
1971 %xx = sext <4 x i8> %x to <4 x i32>
1972 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1973 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1978 define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
1979 ; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
1980 ; CHECK: @ %bb.0: @ %entry
1981 ; CHECK-NEXT: vpt.i8 eq, q1, zr
1982 ; CHECK-NEXT: vaddvat.u8 r0, q0
1983 ; CHECK-NEXT: uxth r0, r0
1986 %c = icmp eq <16 x i8> %b, zeroinitializer
1987 %xx = zext <16 x i8> %x to <16 x i16>
1988 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
1989 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
1994 define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
1995 ; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
1996 ; CHECK: @ %bb.0: @ %entry
1997 ; CHECK-NEXT: vpt.i8 eq, q1, zr
1998 ; CHECK-NEXT: vaddvat.s8 r0, q0
1999 ; CHECK-NEXT: sxth r0, r0
2002 %c = icmp eq <16 x i8> %b, zeroinitializer
2003 %xx = sext <16 x i8> %x to <16 x i16>
2004 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
2005 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
2010 define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2011 ; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
2012 ; CHECK: @ %bb.0: @ %entry
2013 ; CHECK-NEXT: vmovlb.u8 q1, q1
2014 ; CHECK-NEXT: vmovlb.u8 q0, q0
2015 ; CHECK-NEXT: vpt.i16 eq, q1, zr
2016 ; CHECK-NEXT: vaddvat.u16 r0, q0
2017 ; CHECK-NEXT: uxth r0, r0
2020 %c = icmp eq <8 x i8> %b, zeroinitializer
2021 %xx = zext <8 x i8> %x to <8 x i16>
2022 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2023 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2028 define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2029 ; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
2030 ; CHECK: @ %bb.0: @ %entry
2031 ; CHECK-NEXT: vmovlb.u8 q1, q1
2032 ; CHECK-NEXT: vmovlb.s8 q0, q0
2033 ; CHECK-NEXT: vpt.i16 eq, q1, zr
2034 ; CHECK-NEXT: vaddvat.u16 r0, q0
2035 ; CHECK-NEXT: sxth r0, r0
2038 %c = icmp eq <8 x i8> %b, zeroinitializer
2039 %xx = sext <8 x i8> %x to <8 x i16>
2040 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2041 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2046 define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %b, i8 %a) {
2047 ; CHECK-LABEL: add_v16i8_v16i8_acc:
2048 ; CHECK: @ %bb.0: @ %entry
2049 ; CHECK-NEXT: vpt.i8 eq, q1, zr
2050 ; CHECK-NEXT: vaddvat.u8 r0, q0
2051 ; CHECK-NEXT: uxtb r0, r0
2054 %c = icmp eq <16 x i8> %b, zeroinitializer
2055 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
2056 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
2061 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2062 ; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
2063 ; CHECK: @ %bb.0: @ %entry
2064 ; CHECK-NEXT: .save {r7, lr}
2065 ; CHECK-NEXT: push {r7, lr}
2066 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
2067 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
2068 ; CHECK-NEXT: .pad #16
2069 ; CHECK-NEXT: sub sp, #16
2070 ; CHECK-NEXT: vmov q2, q0
2071 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
2072 ; CHECK-NEXT: vmov.i8 q0, #0x0
2073 ; CHECK-NEXT: vmov.i8 q1, #0xff
2074 ; CHECK-NEXT: vpsel q5, q1, q0
2075 ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
2076 ; CHECK-NEXT: vmov.u8 r2, q5[0]
2077 ; CHECK-NEXT: vmov.16 q3[0], r2
2078 ; CHECK-NEXT: vmov.u8 r2, q5[1]
2079 ; CHECK-NEXT: vmov.16 q3[1], r2
2080 ; CHECK-NEXT: vmov.u8 r2, q5[2]
2081 ; CHECK-NEXT: vmov.16 q3[2], r2
2082 ; CHECK-NEXT: vmov.u8 r2, q5[3]
2083 ; CHECK-NEXT: vmov.16 q3[3], r2
2084 ; CHECK-NEXT: vmov.u8 r2, q5[4]
2085 ; CHECK-NEXT: vmov.16 q3[4], r2
2086 ; CHECK-NEXT: vmov.u8 r2, q5[5]
2087 ; CHECK-NEXT: vmov.16 q3[5], r2
2088 ; CHECK-NEXT: vmov.u8 r2, q5[6]
2089 ; CHECK-NEXT: vmov.16 q3[6], r2
2090 ; CHECK-NEXT: vmov.u8 r2, q5[7]
2091 ; CHECK-NEXT: vmov.16 q3[7], r2
2092 ; CHECK-NEXT: vcmp.i16 ne, q3, zr
2093 ; CHECK-NEXT: vpsel q6, q1, q0
2094 ; CHECK-NEXT: vmov.u16 r2, q6[2]
2095 ; CHECK-NEXT: vmov.u16 r3, q6[0]
2096 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
2097 ; CHECK-NEXT: vmov.u16 r2, q6[3]
2098 ; CHECK-NEXT: vmov.u16 r3, q6[1]
2099 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
2100 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2101 ; CHECK-NEXT: vpsel q7, q1, q0
2102 ; CHECK-NEXT: vmov r2, r3, d14
2103 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
2104 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
2105 ; CHECK-NEXT: vmov.u8 r2, q2[1]
2106 ; CHECK-NEXT: vmov.u8 r3, q2[0]
2107 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2108 ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
2109 ; CHECK-NEXT: vmov.i64 q3, #0xff
2110 ; CHECK-NEXT: vand q0, q4, q3
2111 ; CHECK-NEXT: vmov.i32 q4, #0x0
2112 ; CHECK-NEXT: vpsel q0, q0, q4
2113 ; CHECK-NEXT: vmov r12, lr, d1
2114 ; CHECK-NEXT: vmov r2, r3, d0
2115 ; CHECK-NEXT: orr.w lr, lr, r3
2116 ; CHECK-NEXT: add r12, r2
2117 ; CHECK-NEXT: vmov r3, r2, d15
2118 ; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload
2119 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2120 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2121 ; CHECK-NEXT: vmov.u8 r2, q2[3]
2122 ; CHECK-NEXT: vmov.u8 r3, q2[2]
2123 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2124 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2125 ; CHECK-NEXT: vand q0, q0, q3
2126 ; CHECK-NEXT: vpsel q0, q0, q4
2127 ; CHECK-NEXT: vmov r2, r3, d0
2128 ; CHECK-NEXT: adds.w r12, r12, r2
2129 ; CHECK-NEXT: adc.w lr, lr, r3
2130 ; CHECK-NEXT: vmov r2, r3, d1
2131 ; CHECK-NEXT: adds.w r12, r12, r2
2132 ; CHECK-NEXT: vmov.u16 r2, q6[6]
2133 ; CHECK-NEXT: adc.w lr, lr, r3
2134 ; CHECK-NEXT: vmov.u16 r3, q6[4]
2135 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2136 ; CHECK-NEXT: vmov.u16 r2, q6[7]
2137 ; CHECK-NEXT: vmov.u16 r3, q6[5]
2138 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2139 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2140 ; CHECK-NEXT: vpsel q6, q1, q7
2141 ; CHECK-NEXT: vmov r2, r3, d12
2142 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2143 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2144 ; CHECK-NEXT: vmov.u8 r2, q2[5]
2145 ; CHECK-NEXT: vmov.u8 r3, q2[4]
2146 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2147 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2148 ; CHECK-NEXT: vand q0, q0, q3
2149 ; CHECK-NEXT: vpsel q0, q0, q4
2150 ; CHECK-NEXT: vmov r2, r3, d0
2151 ; CHECK-NEXT: adds.w r12, r12, r2
2152 ; CHECK-NEXT: adc.w lr, lr, r3
2153 ; CHECK-NEXT: vmov r2, r3, d1
2154 ; CHECK-NEXT: adds.w r12, r12, r2
2155 ; CHECK-NEXT: adc.w lr, lr, r3
2156 ; CHECK-NEXT: vmov r2, r3, d13
2157 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2158 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2159 ; CHECK-NEXT: vmov.u8 r2, q2[7]
2160 ; CHECK-NEXT: vmov.u8 r3, q2[6]
2161 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2162 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2163 ; CHECK-NEXT: vand q0, q0, q3
2164 ; CHECK-NEXT: vpsel q0, q0, q4
2165 ; CHECK-NEXT: vmov r2, r3, d0
2166 ; CHECK-NEXT: adds.w r12, r12, r2
2167 ; CHECK-NEXT: adc.w lr, lr, r3
2168 ; CHECK-NEXT: vmov r2, r3, d1
2169 ; CHECK-NEXT: adds.w r12, r12, r2
2170 ; CHECK-NEXT: vmov.u8 r2, q5[8]
2171 ; CHECK-NEXT: vmov.16 q6[0], r2
2172 ; CHECK-NEXT: vmov.u8 r2, q5[9]
2173 ; CHECK-NEXT: vmov.16 q6[1], r2
2174 ; CHECK-NEXT: vmov.u8 r2, q5[10]
2175 ; CHECK-NEXT: vmov.16 q6[2], r2
2176 ; CHECK-NEXT: vmov.u8 r2, q5[11]
2177 ; CHECK-NEXT: vmov.16 q6[3], r2
2178 ; CHECK-NEXT: vmov.u8 r2, q5[12]
2179 ; CHECK-NEXT: vmov.16 q6[4], r2
2180 ; CHECK-NEXT: vmov.u8 r2, q5[13]
2181 ; CHECK-NEXT: vmov.16 q6[5], r2
2182 ; CHECK-NEXT: vmov.u8 r2, q5[14]
2183 ; CHECK-NEXT: vmov.16 q6[6], r2
2184 ; CHECK-NEXT: vmov.u8 r2, q5[15]
2185 ; CHECK-NEXT: vmov.16 q6[7], r2
2186 ; CHECK-NEXT: adc.w lr, lr, r3
2187 ; CHECK-NEXT: vcmp.i16 ne, q6, zr
2188 ; CHECK-NEXT: vpsel q5, q1, q7
2189 ; CHECK-NEXT: vmov.u16 r2, q5[2]
2190 ; CHECK-NEXT: vmov.u16 r3, q5[0]
2191 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2192 ; CHECK-NEXT: vmov.u16 r2, q5[3]
2193 ; CHECK-NEXT: vmov.u16 r3, q5[1]
2194 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2195 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2196 ; CHECK-NEXT: vpsel q6, q1, q7
2197 ; CHECK-NEXT: vmov r2, r3, d12
2198 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2199 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2200 ; CHECK-NEXT: vmov.u8 r2, q2[9]
2201 ; CHECK-NEXT: vmov.u8 r3, q2[8]
2202 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2203 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2204 ; CHECK-NEXT: vand q0, q0, q3
2205 ; CHECK-NEXT: vpsel q0, q0, q4
2206 ; CHECK-NEXT: vmov r2, r3, d0
2207 ; CHECK-NEXT: adds.w r12, r12, r2
2208 ; CHECK-NEXT: adc.w lr, lr, r3
2209 ; CHECK-NEXT: vmov r2, r3, d1
2210 ; CHECK-NEXT: adds.w r12, r12, r2
2211 ; CHECK-NEXT: adc.w lr, lr, r3
2212 ; CHECK-NEXT: vmov r2, r3, d13
2213 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2214 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2215 ; CHECK-NEXT: vmov.u8 r2, q2[11]
2216 ; CHECK-NEXT: vmov.u8 r3, q2[10]
2217 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2218 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2219 ; CHECK-NEXT: vand q0, q0, q3
2220 ; CHECK-NEXT: vpsel q0, q0, q4
2221 ; CHECK-NEXT: vmov r2, r3, d0
2222 ; CHECK-NEXT: adds.w r12, r12, r2
2223 ; CHECK-NEXT: adc.w lr, lr, r3
2224 ; CHECK-NEXT: vmov r2, r3, d1
2225 ; CHECK-NEXT: adds.w r12, r12, r2
2226 ; CHECK-NEXT: vmov.u16 r2, q5[6]
2227 ; CHECK-NEXT: adc.w lr, lr, r3
2228 ; CHECK-NEXT: vmov.u16 r3, q5[4]
2229 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2230 ; CHECK-NEXT: vmov.u16 r2, q5[7]
2231 ; CHECK-NEXT: vmov.u16 r3, q5[5]
2232 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2233 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2234 ; CHECK-NEXT: vpsel q1, q1, q7
2235 ; CHECK-NEXT: vmov r2, r3, d2
2236 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2237 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2238 ; CHECK-NEXT: vmov.u8 r2, q2[13]
2239 ; CHECK-NEXT: vmov.u8 r3, q2[12]
2240 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2241 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2242 ; CHECK-NEXT: vand q0, q0, q3
2243 ; CHECK-NEXT: vpsel q0, q0, q4
2244 ; CHECK-NEXT: vmov r2, r3, d0
2245 ; CHECK-NEXT: adds.w r12, r12, r2
2246 ; CHECK-NEXT: adc.w lr, lr, r3
2247 ; CHECK-NEXT: vmov r2, r3, d1
2248 ; CHECK-NEXT: adds.w r12, r12, r2
2249 ; CHECK-NEXT: adc.w lr, lr, r3
2250 ; CHECK-NEXT: vmov r2, r3, d3
2251 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
2252 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
2253 ; CHECK-NEXT: vmov.u8 r2, q2[15]
2254 ; CHECK-NEXT: vmov.u8 r3, q2[14]
2255 ; CHECK-NEXT: vcmp.i32 ne, q0, zr
2256 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2257 ; CHECK-NEXT: vand q0, q0, q3
2258 ; CHECK-NEXT: vpsel q0, q0, q4
2259 ; CHECK-NEXT: vmov r2, r3, d0
2260 ; CHECK-NEXT: adds.w r12, r12, r2
2261 ; CHECK-NEXT: adc.w lr, lr, r3
2262 ; CHECK-NEXT: vmov r2, r3, d1
2263 ; CHECK-NEXT: adds.w r2, r2, r12
2264 ; CHECK-NEXT: adc.w r3, r3, lr
2265 ; CHECK-NEXT: adds r0, r0, r2
2266 ; CHECK-NEXT: adcs r1, r3
2267 ; CHECK-NEXT: add sp, #16
2268 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
2269 ; CHECK-NEXT: pop {r7, pc}
2271 %c = icmp eq <16 x i8> %b, zeroinitializer
2272 %xx = zext <16 x i8> %x to <16 x i64>
2273 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2274 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2279 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2280 ; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
2281 ; CHECK: @ %bb.0: @ %entry
2282 ; CHECK-NEXT: .save {r7, lr}
2283 ; CHECK-NEXT: push {r7, lr}
2284 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
2285 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
2286 ; CHECK-NEXT: vcmp.i8 eq, q1, zr
2287 ; CHECK-NEXT: vmov.i8 q1, #0x0
2288 ; CHECK-NEXT: vmov.i8 q2, #0xff
2289 ; CHECK-NEXT: vpsel q4, q2, q1
2290 ; CHECK-NEXT: vmov.u8 r2, q4[0]
2291 ; CHECK-NEXT: vmov.16 q3[0], r2
2292 ; CHECK-NEXT: vmov.u8 r2, q4[1]
2293 ; CHECK-NEXT: vmov.16 q3[1], r2
2294 ; CHECK-NEXT: vmov.u8 r2, q4[2]
2295 ; CHECK-NEXT: vmov.16 q3[2], r2
2296 ; CHECK-NEXT: vmov.u8 r2, q4[3]
2297 ; CHECK-NEXT: vmov.16 q3[3], r2
2298 ; CHECK-NEXT: vmov.u8 r2, q4[4]
2299 ; CHECK-NEXT: vmov.16 q3[4], r2
2300 ; CHECK-NEXT: vmov.u8 r2, q4[5]
2301 ; CHECK-NEXT: vmov.16 q3[5], r2
2302 ; CHECK-NEXT: vmov.u8 r2, q4[6]
2303 ; CHECK-NEXT: vmov.16 q3[6], r2
2304 ; CHECK-NEXT: vmov.u8 r2, q4[7]
2305 ; CHECK-NEXT: vmov.16 q3[7], r2
2306 ; CHECK-NEXT: vcmp.i16 ne, q3, zr
2307 ; CHECK-NEXT: vpsel q5, q2, q1
2308 ; CHECK-NEXT: vmov.u16 r2, q5[2]
2309 ; CHECK-NEXT: vmov.u16 r3, q5[0]
2310 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
2311 ; CHECK-NEXT: vmov.u16 r2, q5[3]
2312 ; CHECK-NEXT: vmov.u16 r3, q5[1]
2313 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
2314 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2315 ; CHECK-NEXT: vpsel q6, q2, q1
2316 ; CHECK-NEXT: vmov r2, r3, d12
2317 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
2318 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
2319 ; CHECK-NEXT: vmov.s8 r2, q0[1]
2320 ; CHECK-NEXT: vmov.s8 r3, q0[0]
2321 ; CHECK-NEXT: vcmp.i32 ne, q3, zr
2322 ; CHECK-NEXT: vmov q7[2], q7[0], r3, r2
2323 ; CHECK-NEXT: asrs r2, r2, #31
2324 ; CHECK-NEXT: asrs r3, r3, #31
2325 ; CHECK-NEXT: vmov.i32 q3, #0x0
2326 ; CHECK-NEXT: vmov q7[3], q7[1], r3, r2
2327 ; CHECK-NEXT: vpsel q7, q7, q3
2328 ; CHECK-NEXT: vmov lr, r12, d15
2329 ; CHECK-NEXT: vmov r3, r2, d14
2330 ; CHECK-NEXT: adds.w lr, lr, r3
2331 ; CHECK-NEXT: adc.w r12, r12, r2
2332 ; CHECK-NEXT: vmov r2, r3, d13
2333 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
2334 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
2335 ; CHECK-NEXT: vmov.s8 r2, q0[3]
2336 ; CHECK-NEXT: vmov.s8 r3, q0[2]
2337 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
2338 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
2339 ; CHECK-NEXT: asrs r2, r2, #31
2340 ; CHECK-NEXT: asrs r3, r3, #31
2341 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
2342 ; CHECK-NEXT: vpsel q6, q6, q3
2343 ; CHECK-NEXT: vmov r2, r3, d12
2344 ; CHECK-NEXT: adds.w lr, lr, r2
2345 ; CHECK-NEXT: adc.w r12, r12, r3
2346 ; CHECK-NEXT: vmov r2, r3, d13
2347 ; CHECK-NEXT: adds.w lr, lr, r2
2348 ; CHECK-NEXT: vmov.u16 r2, q5[6]
2349 ; CHECK-NEXT: adc.w r12, r12, r3
2350 ; CHECK-NEXT: vmov.u16 r3, q5[4]
2351 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
2352 ; CHECK-NEXT: vmov.u16 r2, q5[7]
2353 ; CHECK-NEXT: vmov.u16 r3, q5[5]
2354 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
2355 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
2356 ; CHECK-NEXT: vpsel q5, q2, q1
2357 ; CHECK-NEXT: vmov r2, r3, d10
2358 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
2359 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
2360 ; CHECK-NEXT: vmov.s8 r2, q0[5]
2361 ; CHECK-NEXT: vmov.s8 r3, q0[4]
2362 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
2363 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
2364 ; CHECK-NEXT: asrs r2, r2, #31
2365 ; CHECK-NEXT: asrs r3, r3, #31
2366 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
2367 ; CHECK-NEXT: vpsel q6, q6, q3
2368 ; CHECK-NEXT: vmov r2, r3, d12
2369 ; CHECK-NEXT: adds.w lr, lr, r2
2370 ; CHECK-NEXT: adc.w r12, r12, r3
2371 ; CHECK-NEXT: vmov r2, r3, d13
2372 ; CHECK-NEXT: adds.w lr, lr, r2
2373 ; CHECK-NEXT: adc.w r12, r12, r3
2374 ; CHECK-NEXT: vmov r2, r3, d11
2375 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
2376 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
2377 ; CHECK-NEXT: vmov.s8 r2, q0[7]
2378 ; CHECK-NEXT: vmov.s8 r3, q0[6]
2379 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2380 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
2381 ; CHECK-NEXT: asrs r2, r2, #31
2382 ; CHECK-NEXT: asrs r3, r3, #31
2383 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
2384 ; CHECK-NEXT: vpsel q5, q5, q3
2385 ; CHECK-NEXT: vmov r2, r3, d10
2386 ; CHECK-NEXT: adds.w lr, lr, r2
2387 ; CHECK-NEXT: adc.w r12, r12, r3
2388 ; CHECK-NEXT: vmov r2, r3, d11
2389 ; CHECK-NEXT: adds.w lr, lr, r2
2390 ; CHECK-NEXT: vmov.u8 r2, q4[8]
2391 ; CHECK-NEXT: vmov.16 q5[0], r2
2392 ; CHECK-NEXT: vmov.u8 r2, q4[9]
2393 ; CHECK-NEXT: vmov.16 q5[1], r2
2394 ; CHECK-NEXT: vmov.u8 r2, q4[10]
2395 ; CHECK-NEXT: vmov.16 q5[2], r2
2396 ; CHECK-NEXT: vmov.u8 r2, q4[11]
2397 ; CHECK-NEXT: vmov.16 q5[3], r2
2398 ; CHECK-NEXT: vmov.u8 r2, q4[12]
2399 ; CHECK-NEXT: vmov.16 q5[4], r2
2400 ; CHECK-NEXT: vmov.u8 r2, q4[13]
2401 ; CHECK-NEXT: vmov.16 q5[5], r2
2402 ; CHECK-NEXT: vmov.u8 r2, q4[14]
2403 ; CHECK-NEXT: vmov.16 q5[6], r2
2404 ; CHECK-NEXT: vmov.u8 r2, q4[15]
2405 ; CHECK-NEXT: vmov.16 q5[7], r2
2406 ; CHECK-NEXT: adc.w r12, r12, r3
2407 ; CHECK-NEXT: vcmp.i16 ne, q5, zr
2408 ; CHECK-NEXT: vpsel q4, q2, q1
2409 ; CHECK-NEXT: vmov.u16 r2, q4[2]
2410 ; CHECK-NEXT: vmov.u16 r3, q4[0]
2411 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
2412 ; CHECK-NEXT: vmov.u16 r2, q4[3]
2413 ; CHECK-NEXT: vmov.u16 r3, q4[1]
2414 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
2415 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2416 ; CHECK-NEXT: vpsel q5, q2, q1
2417 ; CHECK-NEXT: vmov r2, r3, d10
2418 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
2419 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
2420 ; CHECK-NEXT: vmov.s8 r2, q0[9]
2421 ; CHECK-NEXT: vmov.s8 r3, q0[8]
2422 ; CHECK-NEXT: vcmp.i32 ne, q6, zr
2423 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
2424 ; CHECK-NEXT: asrs r2, r2, #31
2425 ; CHECK-NEXT: asrs r3, r3, #31
2426 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
2427 ; CHECK-NEXT: vpsel q6, q6, q3
2428 ; CHECK-NEXT: vmov r2, r3, d12
2429 ; CHECK-NEXT: adds.w lr, lr, r2
2430 ; CHECK-NEXT: adc.w r12, r12, r3
2431 ; CHECK-NEXT: vmov r2, r3, d13
2432 ; CHECK-NEXT: adds.w lr, lr, r2
2433 ; CHECK-NEXT: adc.w r12, r12, r3
2434 ; CHECK-NEXT: vmov r2, r3, d11
2435 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
2436 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
2437 ; CHECK-NEXT: vmov.s8 r2, q0[11]
2438 ; CHECK-NEXT: vmov.s8 r3, q0[10]
2439 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2440 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
2441 ; CHECK-NEXT: asrs r2, r2, #31
2442 ; CHECK-NEXT: asrs r3, r3, #31
2443 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
2444 ; CHECK-NEXT: vpsel q5, q5, q3
2445 ; CHECK-NEXT: vmov r2, r3, d10
2446 ; CHECK-NEXT: adds.w lr, lr, r2
2447 ; CHECK-NEXT: adc.w r12, r12, r3
2448 ; CHECK-NEXT: vmov r2, r3, d11
2449 ; CHECK-NEXT: adds.w lr, lr, r2
2450 ; CHECK-NEXT: vmov.u16 r2, q4[6]
2451 ; CHECK-NEXT: adc.w r12, r12, r3
2452 ; CHECK-NEXT: vmov.u16 r3, q4[4]
2453 ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
2454 ; CHECK-NEXT: vmov.u16 r2, q4[7]
2455 ; CHECK-NEXT: vmov.u16 r3, q4[5]
2456 ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
2457 ; CHECK-NEXT: vcmp.i32 ne, q5, zr
2458 ; CHECK-NEXT: vpsel q1, q2, q1
2459 ; CHECK-NEXT: vmov r2, r3, d2
2460 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
2461 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
2462 ; CHECK-NEXT: vmov.s8 r2, q0[13]
2463 ; CHECK-NEXT: vmov.s8 r3, q0[12]
2464 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
2465 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
2466 ; CHECK-NEXT: asrs r2, r2, #31
2467 ; CHECK-NEXT: asrs r3, r3, #31
2468 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
2469 ; CHECK-NEXT: vpsel q2, q2, q3
2470 ; CHECK-NEXT: vmov r2, r3, d4
2471 ; CHECK-NEXT: adds.w lr, lr, r2
2472 ; CHECK-NEXT: adc.w r12, r12, r3
2473 ; CHECK-NEXT: vmov r2, r3, d5
2474 ; CHECK-NEXT: adds.w lr, lr, r2
2475 ; CHECK-NEXT: adc.w r12, r12, r3
2476 ; CHECK-NEXT: vmov r2, r3, d3
2477 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
2478 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
2479 ; CHECK-NEXT: vmov.s8 r2, q0[15]
2480 ; CHECK-NEXT: vmov.s8 r3, q0[14]
2481 ; CHECK-NEXT: vcmp.i32 ne, q1, zr
2482 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2483 ; CHECK-NEXT: asrs r2, r2, #31
2484 ; CHECK-NEXT: asrs r3, r3, #31
2485 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2486 ; CHECK-NEXT: vpsel q0, q0, q3
2487 ; CHECK-NEXT: vmov r2, r3, d0
2488 ; CHECK-NEXT: adds.w lr, lr, r2
2489 ; CHECK-NEXT: adc.w r12, r12, r3
2490 ; CHECK-NEXT: vmov r2, r3, d1
2491 ; CHECK-NEXT: adds.w r2, r2, lr
2492 ; CHECK-NEXT: adc.w r3, r3, r12
2493 ; CHECK-NEXT: adds r0, r0, r2
2494 ; CHECK-NEXT: adcs r1, r3
2495 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
2496 ; CHECK-NEXT: pop {r7, pc}
2498 %c = icmp eq <16 x i8> %b, zeroinitializer
2499 %xx = sext <16 x i8> %x to <16 x i64>
2500 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2501 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2506 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2507 ; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
2508 ; CHECK: @ %bb.0: @ %entry
2509 ; CHECK-NEXT: .save {r7, lr}
2510 ; CHECK-NEXT: push {r7, lr}
2511 ; CHECK-NEXT: vmov.i64 q2, #0xff
2512 ; CHECK-NEXT: movs r3, #0
2513 ; CHECK-NEXT: vand q1, q1, q2
2514 ; CHECK-NEXT: vand q0, q0, q2
2515 ; CHECK-NEXT: vmov r2, s4
2516 ; CHECK-NEXT: cmp r2, #0
2517 ; CHECK-NEXT: csetm r2, eq
2518 ; CHECK-NEXT: bfi r3, r2, #0, #8
2519 ; CHECK-NEXT: vmov r2, s6
2520 ; CHECK-NEXT: vmov.i32 q1, #0x0
2521 ; CHECK-NEXT: cmp r2, #0
2522 ; CHECK-NEXT: csetm r2, eq
2523 ; CHECK-NEXT: bfi r3, r2, #8, #8
2524 ; CHECK-NEXT: vmsr p0, r3
2525 ; CHECK-NEXT: vpsel q0, q0, q1
2526 ; CHECK-NEXT: vmov r12, lr, d1
2527 ; CHECK-NEXT: vmov r2, r3, d0
2528 ; CHECK-NEXT: add r2, r12
2529 ; CHECK-NEXT: orr.w r3, r3, lr
2530 ; CHECK-NEXT: adds r0, r0, r2
2531 ; CHECK-NEXT: adcs r1, r3
2532 ; CHECK-NEXT: pop {r7, pc}
2534 %c = icmp eq <2 x i8> %b, zeroinitializer
2535 %xx = zext <2 x i8> %x to <2 x i64>
2536 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2537 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2542 define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2543 ; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
2544 ; CHECK: @ %bb.0: @ %entry
2545 ; CHECK-NEXT: .save {r7, lr}
2546 ; CHECK-NEXT: push {r7, lr}
2547 ; CHECK-NEXT: vmov.i32 q2, #0xff
2548 ; CHECK-NEXT: movs r3, #0
2549 ; CHECK-NEXT: vand q1, q1, q2
2550 ; CHECK-NEXT: vmov r2, s4
2551 ; CHECK-NEXT: cmp r2, #0
2552 ; CHECK-NEXT: csetm r2, eq
2553 ; CHECK-NEXT: bfi r3, r2, #0, #8
2554 ; CHECK-NEXT: vmov r2, s6
2555 ; CHECK-NEXT: vmov.i32 q1, #0x0
2556 ; CHECK-NEXT: cmp r2, #0
2557 ; CHECK-NEXT: csetm r2, eq
2558 ; CHECK-NEXT: bfi r3, r2, #8, #8
2559 ; CHECK-NEXT: vmov r2, s2
2560 ; CHECK-NEXT: vmsr p0, r3
2561 ; CHECK-NEXT: vmov r3, s0
2562 ; CHECK-NEXT: sxtb r2, r2
2563 ; CHECK-NEXT: sxtb r3, r3
2564 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
2565 ; CHECK-NEXT: asrs r2, r2, #31
2566 ; CHECK-NEXT: asrs r3, r3, #31
2567 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
2568 ; CHECK-NEXT: vpsel q0, q0, q1
2569 ; CHECK-NEXT: vmov lr, r12, d1
2570 ; CHECK-NEXT: vmov r3, r2, d0
2571 ; CHECK-NEXT: adds.w r3, r3, lr
2572 ; CHECK-NEXT: adc.w r2, r2, r12
2573 ; CHECK-NEXT: adds r0, r0, r3
2574 ; CHECK-NEXT: adcs r1, r2
2575 ; CHECK-NEXT: pop {r7, pc}
2577 %c = icmp eq <2 x i8> %b, zeroinitializer
2578 %xx = sext <2 x i8> %x to <2 x i64>
2579 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2580 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2585 define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %b, i64 %a) {
2586 ; CHECK-LABEL: add_v2i64_v2i64_acc:
2587 ; CHECK: @ %bb.0: @ %entry
2588 ; CHECK-NEXT: .save {r7, lr}
2589 ; CHECK-NEXT: push {r7, lr}
2590 ; CHECK-NEXT: vmov r2, r3, d2
2591 ; CHECK-NEXT: mov.w r12, #0
2592 ; CHECK-NEXT: orrs r2, r3
2593 ; CHECK-NEXT: csetm r2, eq
2594 ; CHECK-NEXT: bfi r12, r2, #0, #8
2595 ; CHECK-NEXT: vmov r2, r3, d3
2596 ; CHECK-NEXT: vmov.i32 q1, #0x0
2597 ; CHECK-NEXT: orrs r2, r3
2598 ; CHECK-NEXT: csetm r2, eq
2599 ; CHECK-NEXT: bfi r12, r2, #8, #8
2600 ; CHECK-NEXT: vmsr p0, r12
2601 ; CHECK-NEXT: vpsel q0, q0, q1
2602 ; CHECK-NEXT: vmov lr, r12, d1
2603 ; CHECK-NEXT: vmov r3, r2, d0
2604 ; CHECK-NEXT: adds.w r3, r3, lr
2605 ; CHECK-NEXT: adc.w r2, r2, r12
2606 ; CHECK-NEXT: adds r0, r0, r3
2607 ; CHECK-NEXT: adcs r1, r2
2608 ; CHECK-NEXT: pop {r7, pc}
2610 %c = icmp eq <2 x i64> %b, zeroinitializer
2611 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
2612 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2617 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
2618 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
2619 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
2620 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
2621 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
2622 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
2623 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
2624 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
2625 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
2626 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)