1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
4 define <8 x i8> @shadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
5 ; CHECK-LABEL: shadd8b:
7 ; CHECK-NEXT: ldr d0, [x0]
8 ; CHECK-NEXT: ldr d1, [x1]
9 ; CHECK-NEXT: shadd.8b v0, v0, v1
11 %tmp1 = load <8 x i8>, ptr %A, align 8
12 %tmp2 = load <8 x i8>, ptr %B, align 8
13 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
17 define <16 x i8> @shadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
18 ; CHECK-LABEL: shadd16b:
20 ; CHECK-NEXT: ldr q0, [x0]
21 ; CHECK-NEXT: ldr q1, [x1]
22 ; CHECK-NEXT: shadd.16b v0, v0, v1
24 %tmp1 = load <16 x i8>, ptr %A, align 16
25 %tmp2 = load <16 x i8>, ptr %B, align 16
26 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
30 define <4 x i16> @shadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
31 ; CHECK-LABEL: shadd4h:
33 ; CHECK-NEXT: ldr d0, [x0]
34 ; CHECK-NEXT: ldr d1, [x1]
35 ; CHECK-NEXT: shadd.4h v0, v0, v1
37 %tmp1 = load <4 x i16>, ptr %A, align 8
38 %tmp2 = load <4 x i16>, ptr %B, align 8
39 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
43 define <8 x i16> @shadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
44 ; CHECK-LABEL: shadd8h:
46 ; CHECK-NEXT: ldr q0, [x0]
47 ; CHECK-NEXT: ldr q1, [x1]
48 ; CHECK-NEXT: shadd.8h v0, v0, v1
50 %tmp1 = load <8 x i16>, ptr %A, align 16
51 %tmp2 = load <8 x i16>, ptr %B, align 16
52 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
56 define <2 x i32> @shadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
57 ; CHECK-LABEL: shadd2s:
59 ; CHECK-NEXT: ldr d0, [x0]
60 ; CHECK-NEXT: ldr d1, [x1]
61 ; CHECK-NEXT: shadd.2s v0, v0, v1
63 %tmp1 = load <2 x i32>, ptr %A, align 8
64 %tmp2 = load <2 x i32>, ptr %B, align 8
65 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
69 define <4 x i32> @shadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
70 ; CHECK-LABEL: shadd4s:
72 ; CHECK-NEXT: ldr q0, [x0]
73 ; CHECK-NEXT: ldr q1, [x1]
74 ; CHECK-NEXT: shadd.4s v0, v0, v1
76 %tmp1 = load <4 x i32>, ptr %A, align 16
77 %tmp2 = load <4 x i32>, ptr %B, align 16
78 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
82 define <8 x i8> @uhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
83 ; CHECK-LABEL: uhadd8b:
85 ; CHECK-NEXT: ldr d0, [x0]
86 ; CHECK-NEXT: ldr d1, [x1]
87 ; CHECK-NEXT: uhadd.8b v0, v0, v1
89 %tmp1 = load <8 x i8>, ptr %A, align 8
90 %tmp2 = load <8 x i8>, ptr %B, align 8
91 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
95 define <16 x i8> @uhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
96 ; CHECK-LABEL: uhadd16b:
98 ; CHECK-NEXT: ldr q0, [x0]
99 ; CHECK-NEXT: ldr q1, [x1]
100 ; CHECK-NEXT: uhadd.16b v0, v0, v1
102 %tmp1 = load <16 x i8>, ptr %A, align 16
103 %tmp2 = load <16 x i8>, ptr %B, align 16
104 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
108 define <4 x i16> @uhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
109 ; CHECK-LABEL: uhadd4h:
111 ; CHECK-NEXT: ldr d0, [x0]
112 ; CHECK-NEXT: ldr d1, [x1]
113 ; CHECK-NEXT: uhadd.4h v0, v0, v1
115 %tmp1 = load <4 x i16>, ptr %A, align 8
116 %tmp2 = load <4 x i16>, ptr %B, align 8
117 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
121 define <8 x i16> @uhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
122 ; CHECK-LABEL: uhadd8h:
124 ; CHECK-NEXT: ldr q0, [x0]
125 ; CHECK-NEXT: ldr q1, [x1]
126 ; CHECK-NEXT: uhadd.8h v0, v0, v1
128 %tmp1 = load <8 x i16>, ptr %A, align 16
129 %tmp2 = load <8 x i16>, ptr %B, align 16
130 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
134 define <2 x i32> @uhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
135 ; CHECK-LABEL: uhadd2s:
137 ; CHECK-NEXT: ldr d0, [x0]
138 ; CHECK-NEXT: ldr d1, [x1]
139 ; CHECK-NEXT: uhadd.2s v0, v0, v1
141 %tmp1 = load <2 x i32>, ptr %A, align 8
142 %tmp2 = load <2 x i32>, ptr %B, align 8
143 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
147 define <4 x i32> @uhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
148 ; CHECK-LABEL: uhadd4s:
150 ; CHECK-NEXT: ldr q0, [x0]
151 ; CHECK-NEXT: ldr q1, [x1]
152 ; CHECK-NEXT: uhadd.4s v0, v0, v1
154 %tmp1 = load <4 x i32>, ptr %A, align 16
155 %tmp2 = load <4 x i32>, ptr %B, align 16
156 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
160 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
161 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
162 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
163 declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
164 declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>)
165 declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>)
166 declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>)
167 declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
168 declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
169 declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>)
170 declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
171 declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>)
173 define <8 x i8> @srhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
174 ; CHECK-LABEL: srhadd8b:
176 ; CHECK-NEXT: ldr d0, [x0]
177 ; CHECK-NEXT: ldr d1, [x1]
178 ; CHECK-NEXT: srhadd.8b v0, v0, v1
180 %tmp1 = load <8 x i8>, ptr %A, align 8
181 %tmp2 = load <8 x i8>, ptr %B, align 8
182 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
186 define <16 x i8> @srhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
187 ; CHECK-LABEL: srhadd16b:
189 ; CHECK-NEXT: ldr q0, [x0]
190 ; CHECK-NEXT: ldr q1, [x1]
191 ; CHECK-NEXT: srhadd.16b v0, v0, v1
193 %tmp1 = load <16 x i8>, ptr %A, align 16
194 %tmp2 = load <16 x i8>, ptr %B, align 16
195 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
199 define <4 x i16> @srhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
200 ; CHECK-LABEL: srhadd4h:
202 ; CHECK-NEXT: ldr d0, [x0]
203 ; CHECK-NEXT: ldr d1, [x1]
204 ; CHECK-NEXT: srhadd.4h v0, v0, v1
206 %tmp1 = load <4 x i16>, ptr %A, align 8
207 %tmp2 = load <4 x i16>, ptr %B, align 8
208 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
212 define <8 x i16> @srhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
213 ; CHECK-LABEL: srhadd8h:
215 ; CHECK-NEXT: ldr q0, [x0]
216 ; CHECK-NEXT: ldr q1, [x1]
217 ; CHECK-NEXT: srhadd.8h v0, v0, v1
219 %tmp1 = load <8 x i16>, ptr %A, align 16
220 %tmp2 = load <8 x i16>, ptr %B, align 16
221 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
225 define <2 x i32> @srhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
226 ; CHECK-LABEL: srhadd2s:
228 ; CHECK-NEXT: ldr d0, [x0]
229 ; CHECK-NEXT: ldr d1, [x1]
230 ; CHECK-NEXT: srhadd.2s v0, v0, v1
232 %tmp1 = load <2 x i32>, ptr %A, align 8
233 %tmp2 = load <2 x i32>, ptr %B, align 8
234 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
238 define <4 x i32> @srhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
239 ; CHECK-LABEL: srhadd4s:
241 ; CHECK-NEXT: ldr q0, [x0]
242 ; CHECK-NEXT: ldr q1, [x1]
243 ; CHECK-NEXT: srhadd.4s v0, v0, v1
245 %tmp1 = load <4 x i32>, ptr %A, align 16
246 %tmp2 = load <4 x i32>, ptr %B, align 16
247 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
251 define <8 x i8> @urhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
252 ; CHECK-LABEL: urhadd8b:
254 ; CHECK-NEXT: ldr d0, [x0]
255 ; CHECK-NEXT: ldr d1, [x1]
256 ; CHECK-NEXT: urhadd.8b v0, v0, v1
258 %tmp1 = load <8 x i8>, ptr %A, align 8
259 %tmp2 = load <8 x i8>, ptr %B, align 8
260 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
264 define <16 x i8> @urhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
265 ; CHECK-LABEL: urhadd16b:
267 ; CHECK-NEXT: ldr q0, [x0]
268 ; CHECK-NEXT: ldr q1, [x1]
269 ; CHECK-NEXT: urhadd.16b v0, v0, v1
271 %tmp1 = load <16 x i8>, ptr %A, align 16
272 %tmp2 = load <16 x i8>, ptr %B, align 16
273 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
277 define <4 x i16> @urhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
278 ; CHECK-LABEL: urhadd4h:
280 ; CHECK-NEXT: ldr d0, [x0]
281 ; CHECK-NEXT: ldr d1, [x1]
282 ; CHECK-NEXT: urhadd.4h v0, v0, v1
284 %tmp1 = load <4 x i16>, ptr %A, align 8
285 %tmp2 = load <4 x i16>, ptr %B, align 8
286 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
290 define <8 x i16> @urhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
291 ; CHECK-LABEL: urhadd8h:
293 ; CHECK-NEXT: ldr q0, [x0]
294 ; CHECK-NEXT: ldr q1, [x1]
295 ; CHECK-NEXT: urhadd.8h v0, v0, v1
297 %tmp1 = load <8 x i16>, ptr %A, align 16
298 %tmp2 = load <8 x i16>, ptr %B, align 16
299 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
303 define <2 x i32> @urhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
304 ; CHECK-LABEL: urhadd2s:
306 ; CHECK-NEXT: ldr d0, [x0]
307 ; CHECK-NEXT: ldr d1, [x1]
308 ; CHECK-NEXT: urhadd.2s v0, v0, v1
310 %tmp1 = load <2 x i32>, ptr %A, align 8
311 %tmp2 = load <2 x i32>, ptr %B, align 8
312 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
316 define <4 x i32> @urhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
317 ; CHECK-LABEL: urhadd4s:
319 ; CHECK-NEXT: ldr q0, [x0]
320 ; CHECK-NEXT: ldr q1, [x1]
321 ; CHECK-NEXT: urhadd.4s v0, v0, v1
323 %tmp1 = load <4 x i32>, ptr %A, align 16
324 %tmp2 = load <4 x i32>, ptr %B, align 16
325 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
329 define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
330 ; CHECK-LABEL: testLowerToSRHADD8b:
332 ; CHECK-NEXT: srhadd.8b v0, v0, v1
333 ; CHECK-NEXT: str d0, [x0]
335 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
336 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
337 %add1 = add nsw <8 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
338 %add2 = add nsw <8 x i16> %add1, %sextsrc2
339 %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340 %result = trunc <8 x i16> %resulti16 to <8 x i8>
341 store <8 x i8> %result, ptr %dest, align 8
345 define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
346 ; CHECK-LABEL: testLowerToSRHADD4h:
348 ; CHECK-NEXT: srhadd.4h v0, v0, v1
349 ; CHECK-NEXT: str d0, [x0]
351 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
352 %sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
353 %add1 = add nsw <4 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1>
354 %add2 = add nsw <4 x i32> %add1, %sextsrc2
355 %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
356 %result = trunc <4 x i32> %resulti16 to <4 x i16>
357 store <4 x i16> %result, ptr %dest, align 8
361 define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
362 ; CHECK-LABEL: testLowerToSRHADD2s:
364 ; CHECK-NEXT: srhadd.2s v0, v0, v1
365 ; CHECK-NEXT: str d0, [x0]
367 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
368 %sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
369 %add1 = add nsw <2 x i64> %sextsrc1, <i64 1, i64 1>
370 %add2 = add nsw <2 x i64> %add1, %sextsrc2
371 %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
372 %result = trunc <2 x i64> %resulti16 to <2 x i32>
373 store <2 x i32> %result, ptr %dest, align 8
377 define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
378 ; CHECK-LABEL: testLowerToSRHADD16b:
380 ; CHECK-NEXT: srhadd.16b v0, v0, v1
381 ; CHECK-NEXT: str q0, [x0]
383 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
384 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
385 %add1 = add nsw <16 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
386 %add2 = add nsw <16 x i16> %add1, %sextsrc2
387 %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
388 %result = trunc <16 x i16> %resulti16 to <16 x i8>
389 store <16 x i8> %result, ptr %dest, align 16
393 define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
394 ; CHECK-LABEL: testLowerToSRHADD8h:
396 ; CHECK-NEXT: srhadd.8h v0, v0, v1
397 ; CHECK-NEXT: str q0, [x0]
399 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
400 %sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
401 %add1 = add nsw <8 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
402 %add2 = add nsw <8 x i32> %add1, %sextsrc2
403 %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
404 %result = trunc <8 x i32> %resulti16 to <8 x i16>
405 store <8 x i16> %result, ptr %dest, align 16
409 define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
410 ; CHECK-LABEL: testLowerToSRHADD4s:
412 ; CHECK-NEXT: srhadd.4s v0, v0, v1
413 ; CHECK-NEXT: str q0, [x0]
415 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
416 %sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
417 %add1 = add nsw <4 x i64> %sextsrc1, <i64 1, i64 1, i64 1, i64 1>
418 %add2 = add nsw <4 x i64> %add1, %sextsrc2
419 %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
420 %result = trunc <4 x i64> %resulti16 to <4 x i32>
421 store <4 x i32> %result, ptr %dest, align 16
425 define void @testLowerToSHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
426 ; CHECK-LABEL: testLowerToSHADD8b:
428 ; CHECK-NEXT: shadd.8b v0, v0, v1
429 ; CHECK-NEXT: str d0, [x0]
431 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
432 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
433 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2
434 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
435 %result = trunc <8 x i16> %resulti16 to <8 x i8>
436 store <8 x i8> %result, ptr %dest, align 8
440 define void @testLowerToSHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
441 ; CHECK-LABEL: testLowerToSHADD4h:
443 ; CHECK-NEXT: shadd.4h v0, v0, v1
444 ; CHECK-NEXT: str d0, [x0]
446 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
447 %sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
448 %add = add nsw <4 x i32> %sextsrc1, %sextsrc2
449 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
450 %result = trunc <4 x i32> %resulti16 to <4 x i16>
451 store <4 x i16> %result, ptr %dest, align 8
455 define void @testLowerToSHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
456 ; CHECK-LABEL: testLowerToSHADD2s:
458 ; CHECK-NEXT: shadd.2s v0, v0, v1
459 ; CHECK-NEXT: str d0, [x0]
461 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
462 %sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
463 %add = add nsw <2 x i64> %sextsrc1, %sextsrc2
464 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
465 %result = trunc <2 x i64> %resulti16 to <2 x i32>
466 store <2 x i32> %result, ptr %dest, align 8
470 define void @testLowerToSHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
471 ; CHECK-LABEL: testLowerToSHADD16b:
473 ; CHECK-NEXT: shadd.16b v0, v0, v1
474 ; CHECK-NEXT: str q0, [x0]
476 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
477 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
478 %add = add nsw <16 x i16> %sextsrc1, %sextsrc2
479 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
480 %result = trunc <16 x i16> %resulti16 to <16 x i8>
481 store <16 x i8> %result, ptr %dest, align 16
485 define void @testLowerToSHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
486 ; CHECK-LABEL: testLowerToSHADD8h:
488 ; CHECK-NEXT: shadd.8h v0, v0, v1
489 ; CHECK-NEXT: str q0, [x0]
491 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
492 %sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
493 %add = add nsw <8 x i32> %sextsrc1, %sextsrc2
494 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
495 %result = trunc <8 x i32> %resulti16 to <8 x i16>
496 store <8 x i16> %result, ptr %dest, align 16
500 define void @testLowerToSHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
501 ; CHECK-LABEL: testLowerToSHADD4s:
503 ; CHECK-NEXT: shadd.4s v0, v0, v1
504 ; CHECK-NEXT: str q0, [x0]
506 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
507 %sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
508 %add = add nsw <4 x i64> %sextsrc1, %sextsrc2
509 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
510 %result = trunc <4 x i64> %resulti16 to <4 x i32>
511 store <4 x i32> %result, ptr %dest, align 16
515 define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
516 ; CHECK-LABEL: testLowerToURHADD8b:
518 ; CHECK-NEXT: urhadd.8b v0, v0, v1
519 ; CHECK-NEXT: str d0, [x0]
521 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
522 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
523 %add1 = add nuw nsw <8 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
524 %add2 = add nuw nsw <8 x i16> %add1, %zextsrc2
525 %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
526 %result = trunc <8 x i16> %resulti16 to <8 x i8>
527 store <8 x i8> %result, ptr %dest, align 8
531 define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
532 ; CHECK-LABEL: testLowerToURHADD4h:
534 ; CHECK-NEXT: urhadd.4h v0, v0, v1
535 ; CHECK-NEXT: str d0, [x0]
537 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
538 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
539 %add1 = add nuw nsw <4 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1>
540 %add2 = add nuw nsw <4 x i32> %add1, %zextsrc2
541 %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
542 %result = trunc <4 x i32> %resulti16 to <4 x i16>
543 store <4 x i16> %result, ptr %dest, align 8
547 define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
548 ; CHECK-LABEL: testLowerToURHADD2s:
550 ; CHECK-NEXT: urhadd.2s v0, v0, v1
551 ; CHECK-NEXT: str d0, [x0]
553 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
554 %zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
555 %add1 = add nuw nsw <2 x i64> %zextsrc1, <i64 1, i64 1>
556 %add2 = add nuw nsw <2 x i64> %add1, %zextsrc2
557 %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
558 %result = trunc <2 x i64> %resulti16 to <2 x i32>
559 store <2 x i32> %result, ptr %dest, align 8
563 define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
564 ; CHECK-LABEL: testLowerToURHADD16b:
566 ; CHECK-NEXT: urhadd.16b v0, v0, v1
567 ; CHECK-NEXT: str q0, [x0]
569 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
570 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
571 %add1 = add nuw nsw <16 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
572 %add2 = add nuw nsw <16 x i16> %add1, %zextsrc2
573 %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
574 %result = trunc <16 x i16> %resulti16 to <16 x i8>
575 store <16 x i8> %result, ptr %dest, align 16
579 define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
580 ; CHECK-LABEL: testLowerToURHADD8h:
582 ; CHECK-NEXT: urhadd.8h v0, v0, v1
583 ; CHECK-NEXT: str q0, [x0]
585 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
586 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
587 %add1 = add nuw nsw <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
588 %add2 = add nuw nsw <8 x i32> %add1, %zextsrc2
589 %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
590 %result = trunc <8 x i32> %resulti16 to <8 x i16>
591 store <8 x i16> %result, ptr %dest, align 16
595 define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
596 ; CHECK-LABEL: testLowerToURHADD4s:
598 ; CHECK-NEXT: urhadd.4s v0, v0, v1
599 ; CHECK-NEXT: str q0, [x0]
601 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
602 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
603 %add1 = add nuw nsw <4 x i64> %zextsrc1, <i64 1, i64 1, i64 1, i64 1>
604 %add2 = add nuw nsw <4 x i64> %add1, %zextsrc2
605 %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
606 %result = trunc <4 x i64> %resulti16 to <4 x i32>
607 store <4 x i32> %result, ptr %dest, align 16
611 define void @testLowerToUHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
612 ; CHECK-LABEL: testLowerToUHADD8b:
614 ; CHECK-NEXT: uhadd.8b v0, v0, v1
615 ; CHECK-NEXT: str d0, [x0]
617 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
618 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
619 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
620 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
621 %result = trunc <8 x i16> %resulti16 to <8 x i8>
622 store <8 x i8> %result, ptr %dest, align 8
626 define void @testLowerToUHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
627 ; CHECK-LABEL: testLowerToUHADD4h:
629 ; CHECK-NEXT: uhadd.4h v0, v0, v1
630 ; CHECK-NEXT: str d0, [x0]
632 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
633 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
634 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
635 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
636 %result = trunc <4 x i32> %resulti16 to <4 x i16>
637 store <4 x i16> %result, ptr %dest, align 8
641 define void @testLowerToUHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
642 ; CHECK-LABEL: testLowerToUHADD2s:
644 ; CHECK-NEXT: uhadd.2s v0, v0, v1
645 ; CHECK-NEXT: str d0, [x0]
647 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
648 %zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
649 %add = add nuw nsw <2 x i64> %zextsrc1, %zextsrc2
650 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
651 %result = trunc <2 x i64> %resulti16 to <2 x i32>
652 store <2 x i32> %result, ptr %dest, align 8
656 define void @testLowerToUHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
657 ; CHECK-LABEL: testLowerToUHADD16b:
659 ; CHECK-NEXT: uhadd.16b v0, v0, v1
660 ; CHECK-NEXT: str q0, [x0]
662 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
663 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
664 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
665 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
666 %result = trunc <16 x i16> %resulti16 to <16 x i8>
667 store <16 x i8> %result, ptr %dest, align 16
671 define void @testLowerToUHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
672 ; CHECK-LABEL: testLowerToUHADD8h:
674 ; CHECK-NEXT: uhadd.8h v0, v0, v1
675 ; CHECK-NEXT: str q0, [x0]
677 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
678 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
679 %add = add nuw nsw <8 x i32> %zextsrc1, %zextsrc2
680 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
681 %result = trunc <8 x i32> %resulti16 to <8 x i16>
682 store <8 x i16> %result, ptr %dest, align 16
686 define void @testLowerToUHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
687 ; CHECK-LABEL: testLowerToUHADD4s:
689 ; CHECK-NEXT: uhadd.4s v0, v0, v1
690 ; CHECK-NEXT: str q0, [x0]
692 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
693 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
694 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
695 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
696 %result = trunc <4 x i64> %resulti16 to <4 x i32>
697 store <4 x i32> %result, ptr %dest, align 16
701 define <4 x i32> @hadd16_sext_asr(<4 x i16> %src1, <4 x i16> %src2) {
702 ; CHECK-LABEL: hadd16_sext_asr:
704 ; CHECK-NEXT: shadd.4h v0, v0, v1
705 ; CHECK-NEXT: sshll.4s v0, v0, #0
707 %zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
708 %zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
709 %add = add nsw <4 x i32> %zextsrc1, %zextsrc2
710 %resulti16 = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
711 ret <4 x i32> %resulti16
714 define <4 x i32> @hadd16_zext_asr(<4 x i16> %src1, <4 x i16> %src2) {
715 ; CHECK-LABEL: hadd16_zext_asr:
717 ; CHECK-NEXT: uhadd.4h v0, v0, v1
718 ; CHECK-NEXT: ushll.4s v0, v0, #0
720 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
721 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
722 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
723 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
724 ret <4 x i32> %resulti16
727 define <4 x i32> @hadd16_sext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
728 ; CHECK-LABEL: hadd16_sext_lsr:
730 ; CHECK-NEXT: saddl.4s v0, v0, v1
731 ; CHECK-NEXT: ushr.4s v0, v0, #1
733 %zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
734 %zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
735 %add = add nsw <4 x i32> %zextsrc1, %zextsrc2
736 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
737 ret <4 x i32> %resulti16
740 define <4 x i32> @hadd16_zext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
741 ; CHECK-LABEL: hadd16_zext_lsr:
743 ; CHECK-NEXT: uhadd.4h v0, v0, v1
744 ; CHECK-NEXT: ushll.4s v0, v0, #0
746 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
747 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
748 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
749 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
750 ret <4 x i32> %resulti16
753 define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) {
754 ; CHECK-LABEL: hadd32_sext_asr:
756 ; CHECK-NEXT: shadd.4s v0, v0, v1
757 ; CHECK-NEXT: sshll2.2d v1, v0, #0
758 ; CHECK-NEXT: sshll.2d v0, v0, #0
760 %zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
761 %zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
762 %add = add nsw <4 x i64> %zextsrc1, %zextsrc2
763 %resulti32 = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
764 ret <4 x i64> %resulti32
767 define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) {
768 ; CHECK-LABEL: hadd32_zext_asr:
770 ; CHECK-NEXT: uhadd.4s v0, v0, v1
771 ; CHECK-NEXT: ushll2.2d v1, v0, #0
772 ; CHECK-NEXT: ushll.2d v0, v0, #0
774 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
775 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
776 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
777 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
778 ret <4 x i64> %resulti32
781 define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
782 ; CHECK-LABEL: hadd32_sext_lsr:
784 ; CHECK-NEXT: saddl.2d v2, v0, v1
785 ; CHECK-NEXT: saddl2.2d v0, v0, v1
786 ; CHECK-NEXT: ushr.2d v1, v0, #1
787 ; CHECK-NEXT: ushr.2d v0, v2, #1
789 %zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
790 %zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
791 %add = add nsw <4 x i64> %zextsrc1, %zextsrc2
792 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
793 ret <4 x i64> %resulti32
796 define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
797 ; CHECK-LABEL: hadd32_zext_lsr:
799 ; CHECK-NEXT: uhadd.4s v0, v0, v1
800 ; CHECK-NEXT: ushll2.2d v1, v0, #0
801 ; CHECK-NEXT: ushll.2d v0, v0, #0
803 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
804 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
805 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
806 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
807 ret <4 x i64> %resulti32
810 define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
811 ; CHECK-LABEL: hadd8_sext_asr:
813 ; CHECK-NEXT: shl.4h v0, v0, #8
814 ; CHECK-NEXT: shl.4h v1, v1, #8
815 ; CHECK-NEXT: sshr.4h v0, v0, #8
816 ; CHECK-NEXT: sshr.4h v1, v1, #8
817 ; CHECK-NEXT: shadd.4h v0, v0, v1
819 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
820 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
821 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2
822 %resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
823 ret <4 x i16> %resulti8
826 define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
827 ; CHECK-LABEL: hadd8_zext_asr:
829 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
830 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
831 ; CHECK-NEXT: uhadd.4h v0, v0, v1
833 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
834 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
835 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
836 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
837 ret <4 x i16> %resulti8
840 define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
841 ; CHECK-LABEL: hadd8_sext_lsr:
843 ; CHECK-NEXT: shl.4h v0, v0, #8
844 ; CHECK-NEXT: shl.4h v1, v1, #8
845 ; CHECK-NEXT: sshr.4h v0, v0, #8
846 ; CHECK-NEXT: ssra.4h v0, v1, #8
847 ; CHECK-NEXT: ushr.4h v0, v0, #1
849 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
850 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
851 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2
852 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
853 ret <4 x i16> %resulti8
856 define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
857 ; CHECK-LABEL: hadd8_zext_lsr:
859 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
860 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
861 ; CHECK-NEXT: uhadd.4h v0, v0, v1
863 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
864 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
865 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
866 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
867 ret <4 x i16> %resulti8
870 define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
871 ; CHECK-LABEL: hadd8x2_sext_asr:
873 ; CHECK-NEXT: shl.2s v0, v0, #24
874 ; CHECK-NEXT: shl.2s v1, v1, #24
875 ; CHECK-NEXT: sshr.2s v0, v0, #24
876 ; CHECK-NEXT: sshr.2s v1, v1, #24
877 ; CHECK-NEXT: shadd.2s v0, v0, v1
879 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
880 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
881 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2
882 %resulti8 = ashr <2 x i16> %add, <i16 1, i16 1>
883 ret <2 x i16> %resulti8
886 define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
887 ; CHECK-LABEL: hadd8x2_zext_asr:
889 ; CHECK-NEXT: movi d2, #0x0000ff000000ff
890 ; CHECK-NEXT: and.8b v0, v0, v2
891 ; CHECK-NEXT: and.8b v1, v1, v2
892 ; CHECK-NEXT: uhadd.2s v0, v0, v1
894 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
895 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
896 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
897 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
898 ret <2 x i16> %resulti8
901 define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
902 ; CHECK-LABEL: hadd8x2_sext_lsr:
904 ; CHECK-NEXT: shl.2s v0, v0, #24
905 ; CHECK-NEXT: shl.2s v1, v1, #24
906 ; CHECK-NEXT: sshr.2s v0, v0, #24
907 ; CHECK-NEXT: ssra.2s v0, v1, #24
908 ; CHECK-NEXT: movi d1, #0x00ffff0000ffff
909 ; CHECK-NEXT: and.8b v0, v0, v1
910 ; CHECK-NEXT: ushr.2s v0, v0, #1
912 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
913 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
914 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2
915 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
916 ret <2 x i16> %resulti8
919 define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
920 ; CHECK-LABEL: hadd8x2_zext_lsr:
922 ; CHECK-NEXT: movi d2, #0x0000ff000000ff
923 ; CHECK-NEXT: and.8b v0, v0, v2
924 ; CHECK-NEXT: and.8b v1, v1, v2
925 ; CHECK-NEXT: uhadd.2s v0, v0, v1
927 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
928 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
929 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
930 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
931 ret <2 x i16> %resulti8
934 define <4 x i16> @rhadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
935 ; CHECK-LABEL: rhadd8_sext_asr:
937 ; CHECK-NEXT: shl.4h v0, v0, #8
938 ; CHECK-NEXT: shl.4h v1, v1, #8
939 ; CHECK-NEXT: sshr.4h v0, v0, #8
940 ; CHECK-NEXT: sshr.4h v1, v1, #8
941 ; CHECK-NEXT: srhadd.4h v0, v0, v1
943 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
944 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
945 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2
946 %add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
947 %resulti8 = ashr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
948 ret <4 x i16> %resulti8
951 define <4 x i16> @rhadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
952 ; CHECK-LABEL: rhadd8_zext_asr:
954 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
955 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
956 ; CHECK-NEXT: urhadd.4h v0, v0, v1
958 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
959 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
960 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
961 %add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
962 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
963 ret <4 x i16> %resulti8
966 define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
967 ; CHECK-LABEL: rhadd8_sext_lsr:
969 ; CHECK-NEXT: shl.4h v0, v0, #8
970 ; CHECK-NEXT: shl.4h v1, v1, #8
971 ; CHECK-NEXT: sshr.4h v0, v0, #8
972 ; CHECK-NEXT: ssra.4h v0, v1, #8
973 ; CHECK-NEXT: movi.4h v1, #1
974 ; CHECK-NEXT: add.4h v0, v0, v1
975 ; CHECK-NEXT: ushr.4h v0, v0, #1
977 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
978 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
979 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2
980 %add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
981 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
982 ret <4 x i16> %resulti8
985 define <4 x i16> @rhadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
986 ; CHECK-LABEL: rhadd8_zext_lsr:
988 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
989 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
990 ; CHECK-NEXT: urhadd.4h v0, v0, v1
992 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
993 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
994 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
995 %add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
996 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
997 ret <4 x i16> %resulti8
1000 define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
1001 ; CHECK-LABEL: rhadd8x2_sext_asr:
1003 ; CHECK-NEXT: shl.2s v0, v0, #24
1004 ; CHECK-NEXT: shl.2s v1, v1, #24
1005 ; CHECK-NEXT: sshr.2s v0, v0, #24
1006 ; CHECK-NEXT: sshr.2s v1, v1, #24
1007 ; CHECK-NEXT: srhadd.2s v0, v0, v1
1009 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
1010 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
1011 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2
1012 %add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
1013 %resulti8 = ashr <2 x i16> %add2, <i16 1, i16 1>
1014 ret <2 x i16> %resulti8
1017 define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
1018 ; CHECK-LABEL: rhadd8x2_zext_asr:
1020 ; CHECK-NEXT: movi d2, #0x0000ff000000ff
1021 ; CHECK-NEXT: and.8b v0, v0, v2
1022 ; CHECK-NEXT: and.8b v1, v1, v2
1023 ; CHECK-NEXT: urhadd.2s v0, v0, v1
1025 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
1026 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
1027 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
1028 %add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
1029 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
1030 ret <2 x i16> %resulti8
1033 define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
1034 ; CHECK-LABEL: rhadd8x2_sext_lsr:
1036 ; CHECK-NEXT: shl.2s v0, v0, #24
1037 ; CHECK-NEXT: shl.2s v1, v1, #24
1038 ; CHECK-NEXT: movi d2, #0x00ffff0000ffff
1039 ; CHECK-NEXT: sshr.2s v0, v0, #24
1040 ; CHECK-NEXT: sshr.2s v1, v1, #24
1041 ; CHECK-NEXT: mvn.8b v0, v0
1042 ; CHECK-NEXT: sub.2s v0, v1, v0
1043 ; CHECK-NEXT: and.8b v0, v0, v2
1044 ; CHECK-NEXT: ushr.2s v0, v0, #1
1046 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
1047 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
1048 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2
1049 %add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
1050 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
1051 ret <2 x i16> %resulti8
1054 define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
1055 ; CHECK-LABEL: rhadd8x2_zext_lsr:
1057 ; CHECK-NEXT: movi d2, #0x0000ff000000ff
1058 ; CHECK-NEXT: and.8b v0, v0, v2
1059 ; CHECK-NEXT: and.8b v1, v1, v2
1060 ; CHECK-NEXT: urhadd.2s v0, v0, v1
1062 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
1063 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
1064 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
1065 %add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
1066 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
1067 ret <2 x i16> %resulti8
1071 define void @testLowerToSHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
1072 ; CHECK-LABEL: testLowerToSHADD8b_c:
1074 ; CHECK-NEXT: movi.8b v1, #10
1075 ; CHECK-NEXT: shadd.8b v0, v0, v1
1076 ; CHECK-NEXT: str d0, [x0]
1078 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
1079 %add = add nsw <8 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1080 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1081 %result = trunc <8 x i16> %resulti16 to <8 x i8>
1082 store <8 x i8> %result, ptr %dest, align 8
1086 define void @testLowerToSHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
1087 ; CHECK-LABEL: testLowerToSHADD4h_c:
1089 ; CHECK-NEXT: movi.4h v1, #10
1090 ; CHECK-NEXT: shadd.4h v0, v0, v1
1091 ; CHECK-NEXT: str d0, [x0]
1093 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
1094 %add = add nsw <4 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10>
1095 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
1096 %result = trunc <4 x i32> %resulti16 to <4 x i16>
1097 store <4 x i16> %result, ptr %dest, align 8
1101 define void @testLowerToSHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
1102 ; CHECK-LABEL: testLowerToSHADD2s_c:
1104 ; CHECK-NEXT: movi.2s v1, #10
1105 ; CHECK-NEXT: shadd.2s v0, v0, v1
1106 ; CHECK-NEXT: str d0, [x0]
1108 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
1109 %add = add nsw <2 x i64> %sextsrc1, <i64 10, i64 10>
1110 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
1111 %result = trunc <2 x i64> %resulti16 to <2 x i32>
1112 store <2 x i32> %result, ptr %dest, align 8
1116 define void @testLowerToSHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
1117 ; CHECK-LABEL: testLowerToSHADD16b_c:
1119 ; CHECK-NEXT: movi.16b v1, #10
1120 ; CHECK-NEXT: shadd.16b v0, v0, v1
1121 ; CHECK-NEXT: str q0, [x0]
1123 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
1124 %add = add nsw <16 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1125 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1126 %result = trunc <16 x i16> %resulti16 to <16 x i8>
1127 store <16 x i8> %result, ptr %dest, align 16
1131 define void @testLowerToSHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
1132 ; CHECK-LABEL: testLowerToSHADD8h_c:
1134 ; CHECK-NEXT: movi.8h v1, #10
1135 ; CHECK-NEXT: shadd.8h v0, v0, v1
1136 ; CHECK-NEXT: str q0, [x0]
1138 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
1139 %add = add nsw <8 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
1140 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1141 %result = trunc <8 x i32> %resulti16 to <8 x i16>
1142 store <8 x i16> %result, ptr %dest, align 16
1146 define void @testLowerToSHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
1147 ; CHECK-LABEL: testLowerToSHADD4s_c:
1149 ; CHECK-NEXT: movi.4s v1, #10
1150 ; CHECK-NEXT: shadd.4s v0, v0, v1
1151 ; CHECK-NEXT: str q0, [x0]
1153 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
1154 %add = add nsw <4 x i64> %sextsrc1, <i64 10, i64 10, i64 10, i64 10>
1155 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
1156 %result = trunc <4 x i64> %resulti16 to <4 x i32>
1157 store <4 x i32> %result, ptr %dest, align 16
1161 define void @testLowerToUHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
1162 ; CHECK-LABEL: testLowerToUHADD8b_c:
1164 ; CHECK-NEXT: movi.8b v1, #10
1165 ; CHECK-NEXT: uhadd.8b v0, v0, v1
1166 ; CHECK-NEXT: str d0, [x0]
1168 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
1169 %add = add nuw nsw <8 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1170 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1171 %result = trunc <8 x i16> %resulti16 to <8 x i8>
1172 store <8 x i8> %result, ptr %dest, align 8
1176 define void @testLowerToUHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
1177 ; CHECK-LABEL: testLowerToUHADD4h_c:
1179 ; CHECK-NEXT: movi.4h v1, #10
1180 ; CHECK-NEXT: uhadd.4h v0, v0, v1
1181 ; CHECK-NEXT: str d0, [x0]
1183 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
1184 %add = add nuw nsw <4 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10>
1185 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
1186 %result = trunc <4 x i32> %resulti16 to <4 x i16>
1187 store <4 x i16> %result, ptr %dest, align 8
1191 define void @testLowerToUHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
1192 ; CHECK-LABEL: testLowerToUHADD2s_c:
1194 ; CHECK-NEXT: movi.2s v1, #10
1195 ; CHECK-NEXT: uhadd.2s v0, v0, v1
1196 ; CHECK-NEXT: str d0, [x0]
1198 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
1199 %add = add nuw nsw <2 x i64> %zextsrc1, <i64 10, i64 10>
1200 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
1201 %result = trunc <2 x i64> %resulti16 to <2 x i32>
1202 store <2 x i32> %result, ptr %dest, align 8
1206 define void @testLowerToUHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
1207 ; CHECK-LABEL: testLowerToUHADD16b_c:
1209 ; CHECK-NEXT: movi.16b v1, #10
1210 ; CHECK-NEXT: uhadd.16b v0, v0, v1
1211 ; CHECK-NEXT: str q0, [x0]
1213 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
1214 %add = add nuw nsw <16 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1215 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1216 %result = trunc <16 x i16> %resulti16 to <16 x i8>
1217 store <16 x i8> %result, ptr %dest, align 16
1221 define void @testLowerToUHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
1222 ; CHECK-LABEL: testLowerToUHADD8h_c:
1224 ; CHECK-NEXT: movi.8h v1, #10
1225 ; CHECK-NEXT: uhadd.8h v0, v0, v1
1226 ; CHECK-NEXT: str q0, [x0]
1228 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
1229 %add = add nuw nsw <8 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
1230 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1231 %result = trunc <8 x i32> %resulti16 to <8 x i16>
1232 store <8 x i16> %result, ptr %dest, align 16
1236 define void @testLowerToUHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
1237 ; CHECK-LABEL: testLowerToUHADD4s_c:
1239 ; CHECK-NEXT: movi.4s v1, #10
1240 ; CHECK-NEXT: uhadd.4s v0, v0, v1
1241 ; CHECK-NEXT: str q0, [x0]
1243 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
1244 %add = add nuw nsw <4 x i64> %zextsrc1, <i64 10, i64 10, i64 10, i64 10>
1245 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
1246 %result = trunc <4 x i64> %resulti16 to <4 x i32>
1247 store <4 x i32> %result, ptr %dest, align 16
1251 define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
1252 ; CHECK-LABEL: andmaskv8i8:
1254 ; CHECK-NEXT: movi.8b v2, #7
1255 ; CHECK-NEXT: xtn.8b v0, v0
1256 ; CHECK-NEXT: and.8b v0, v0, v2
1257 ; CHECK-NEXT: uhadd.8b v0, v0, v1
1259 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1260 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
1261 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
1262 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1263 %result = trunc <8 x i16> %resulti16 to <8 x i8>
1264 ret <8 x i8> %result
1267 define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
1268 ; CHECK-LABEL: andmaskv16i8:
1270 ; CHECK-NEXT: movi.16b v3, #7
1271 ; CHECK-NEXT: uzp1.16b v0, v0, v1
1272 ; CHECK-NEXT: and.16b v0, v0, v3
1273 ; CHECK-NEXT: uhadd.16b v0, v0, v2
1275 %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1276 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
1277 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
1278 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1279 %result = trunc <16 x i16> %resulti16 to <16 x i8>
1280 ret <16 x i8> %result
1283 define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) {
1284 ; CHECK-LABEL: andmask2v16i8:
1286 ; CHECK-NEXT: uzp1.16b v2, v2, v3
1287 ; CHECK-NEXT: movi.16b v3, #3
1288 ; CHECK-NEXT: uzp1.16b v0, v0, v1
1289 ; CHECK-NEXT: movi.16b v1, #7
1290 ; CHECK-NEXT: and.16b v2, v2, v3
1291 ; CHECK-NEXT: and.16b v0, v0, v1
1292 ; CHECK-NEXT: uhadd.16b v0, v0, v2
1294 %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1295 %zextsrc2 = and <16 x i16> %src2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1296 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
1297 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1298 %result = trunc <16 x i16> %resulti16 to <16 x i8>
1299 ret <16 x i8> %result
1302 define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) {
1303 ; CHECK-LABEL: andmask2v8i8:
1305 ; CHECK-NEXT: movi.8b v2, #7
1306 ; CHECK-NEXT: xtn.8b v0, v0
1307 ; CHECK-NEXT: xtn.8b v1, v1
1308 ; CHECK-NEXT: and.8b v0, v0, v2
1309 ; CHECK-NEXT: uhadd.8b v0, v0, v1
1311 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1312 %zextsrc2 = and <8 x i16> %src2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
1313 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
1314 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1315 %result = trunc <8 x i16> %resulti16 to <8 x i8>
1316 ret <8 x i8> %result
1319 define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) {
1320 ; CHECK-LABEL: andmask3v8i8:
1322 ; CHECK-NEXT: movi.8h v2, #7
1323 ; CHECK-NEXT: bic.8h v1, #254, lsl #8
1324 ; CHECK-NEXT: and.16b v0, v0, v2
1325 ; CHECK-NEXT: uhadd.8h v0, v0, v1
1327 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1328 %zextsrc2 = and <8 x i16> %src2, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
1329 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
1330 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1331 ret <8 x i16> %resulti16
1334 define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
1335 ; CHECK-LABEL: sextmaskv16i8:
1337 ; CHECK-NEXT: sshr.8h v1, v1, #11
1338 ; CHECK-NEXT: sshr.8h v0, v0, #11
1339 ; CHECK-NEXT: uzp1.16b v0, v0, v1
1340 ; CHECK-NEXT: shadd.16b v0, v0, v2
1342 %sextsrc1 = ashr <16 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1343 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
1344 %add = add nsw <16 x i16> %sextsrc1, %sextsrc2
1345 %1 = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1346 %result = trunc <16 x i16> %1 to <16 x i8>
1347 ret <16 x i8> %result
1350 define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
1351 ; CHECK-LABEL: sextmaskv8i8:
1353 ; CHECK-NEXT: sshr.8h v0, v0, #11
1354 ; CHECK-NEXT: xtn.8b v0, v0
1355 ; CHECK-NEXT: shadd.8b v0, v0, v1
1357 %sextsrc1 = ashr <8 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1358 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
1359 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2
1360 %1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1361 %result = trunc <8 x i16> %1 to <8 x i8>
1362 ret <8 x i8> %result
1365 define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) {
1366 ; CHECK-LABEL: sextmask2v8i8:
1368 ; CHECK-NEXT: shrn.8b v0, v0, #8
1369 ; CHECK-NEXT: shadd.8b v0, v0, v1
1371 %sextsrc1 = ashr <8 x i16> %src1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1372 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
1373 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2
1374 %1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1375 %result = trunc <8 x i16> %1 to <8 x i8>
1376 ret <8 x i8> %result
1379 define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
1380 ; CHECK-LABEL: sextmask3v8i8:
1382 ; CHECK-NEXT: sshr.8h v0, v0, #7
1383 ; CHECK-NEXT: sshll.8h v1, v1, #0
1384 ; CHECK-NEXT: shadd.8h v0, v0, v1
1385 ; CHECK-NEXT: xtn.8b v0, v0
1387 %1 = ashr <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1388 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
1389 %add = add nsw <8 x i16> %1, %sextsrc2
1390 %2 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1391 %result = trunc <8 x i16> %2 to <8 x i8>
1392 ret <8 x i8> %result
1395 declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
1396 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
1397 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
1398 declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
1399 declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>)
1400 declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>)
1401 declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>)
1402 declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
1403 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
1404 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
1405 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
1406 declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)