1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
4 define <8 x i16> @haddu_base(<8 x i16> %src1, <8 x i16> %src2) {
5 ; CHECK-LABEL: haddu_base:
7 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
9 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
10 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
11 %add = add <8 x i32> %zextsrc1, %zextsrc2
12 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
13 %result = trunc <8 x i32> %resulti16 to <8 x i16>
17 define <8 x i16> @haddu_const(<8 x i16> %src1) {
18 ; CHECK-LABEL: haddu_const:
20 ; CHECK-NEXT: movi v1.8h, #1
21 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
23 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
24 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
26 %result = trunc <8 x i32> %resulti16 to <8 x i16>
30 define <8 x i16> @haddu_const_lhs(<8 x i16> %src1) {
31 ; CHECK-LABEL: haddu_const_lhs:
33 ; CHECK-NEXT: movi v1.8h, #1
34 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
36 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
37 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
38 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
39 %result = trunc <8 x i32> %resulti16 to <8 x i16>
43 define <8 x i16> @haddu_const_zero(<8 x i16> %src1) {
44 ; CHECK-LABEL: haddu_const_zero:
46 ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
47 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
48 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
49 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
51 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
52 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
53 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
54 %result = trunc <8 x i32> %resulti16 to <8 x i16>
58 define <8 x i16> @haddu_const_both() {
59 ; CHECK-LABEL: haddu_const_both:
61 ; CHECK-NEXT: movi v0.8h, #2
63 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
64 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
65 %result = trunc <8 x i32> %resulti16 to <8 x i16>
69 define <8 x i16> @haddu_const_bothhigh() {
70 ; CHECK-LABEL: haddu_const_bothhigh:
72 ; CHECK-NEXT: mvni v0.8h, #1
74 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
75 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
76 %add = add <8 x i32> %ext1, %ext2
77 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
78 %result = trunc <8 x i32> %resulti16 to <8 x i16>
82 define <8 x i16> @haddu_undef(<8 x i16> %src1) {
83 ; CHECK-LABEL: haddu_undef:
85 ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
86 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
87 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
88 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
90 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
91 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
92 %add = add <8 x i32> %zextsrc2, %zextsrc1
93 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
94 %result = trunc <8 x i32> %resulti16 to <8 x i16>
100 define <8 x i16> @haddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
101 ; CHECK-LABEL: haddu_i_base:
103 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
105 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
106 ret <8 x i16> %result
109 define <8 x i16> @haddu_i_const(<8 x i16> %src1) {
110 ; CHECK-LABEL: haddu_i_const:
112 ; CHECK-NEXT: movi v1.8h, #1
113 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
115 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
116 ret <8 x i16> %result
119 define <8 x i16> @haddu_i_const_lhs(<8 x i16> %src1) {
120 ; CHECK-LABEL: haddu_i_const_lhs:
122 ; CHECK-NEXT: movi v1.8h, #1
123 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
125 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
126 ret <8 x i16> %result
129 define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
130 ; CHECK-LABEL: haddu_i_const_zero:
132 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
134 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
135 ret <8 x i16> %result
138 define <8 x i16> @haddu_i_const_both() {
139 ; CHECK-LABEL: haddu_i_const_both:
141 ; CHECK-NEXT: movi v0.8h, #2
143 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
144 ret <8 x i16> %result
147 define <8 x i16> @haddu_i_const_bothhigh() {
148 ; CHECK-LABEL: haddu_i_const_bothhigh:
150 ; CHECK-NEXT: mvni v0.8h, #1
152 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
153 ret <8 x i16> %result
156 define <8 x i16> @haddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
157 ; CHECK-LABEL: haddu_i_undef:
159 ; CHECK-NEXT: mov v0.16b, v1.16b
161 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
162 ret <8 x i16> %result
169 define <8 x i16> @hadds_base(<8 x i16> %src1, <8 x i16> %src2) {
170 ; CHECK-LABEL: hadds_base:
172 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
174 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
175 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
176 %add = add <8 x i32> %zextsrc1, %zextsrc2
177 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
178 %result = trunc <8 x i32> %resulti16 to <8 x i16>
179 ret <8 x i16> %result
182 define <8 x i16> @hadds_const(<8 x i16> %src1) {
183 ; CHECK-LABEL: hadds_const:
185 ; CHECK-NEXT: movi v1.8h, #1
186 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
188 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
189 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
190 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
191 %result = trunc <8 x i32> %resulti16 to <8 x i16>
192 ret <8 x i16> %result
195 define <8 x i16> @hadds_const_lhs(<8 x i16> %src1) {
196 ; CHECK-LABEL: hadds_const_lhs:
198 ; CHECK-NEXT: movi v1.8h, #1
199 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
201 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
202 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
203 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
204 %result = trunc <8 x i32> %resulti16 to <8 x i16>
205 ret <8 x i16> %result
208 define <8 x i16> @hadds_const_zero(<8 x i16> %src1) {
209 ; CHECK-LABEL: hadds_const_zero:
211 ; CHECK-NEXT: sshll v1.4s, v0.4h, #0
212 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
213 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
214 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
216 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
217 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
218 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
219 %result = trunc <8 x i32> %resulti16 to <8 x i16>
220 ret <8 x i16> %result
223 define <8 x i16> @hadds_const_both() {
224 ; CHECK-LABEL: hadds_const_both:
226 ; CHECK-NEXT: movi v0.8h, #2
228 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
229 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
230 %result = trunc <8 x i32> %resulti16 to <8 x i16>
231 ret <8 x i16> %result
234 define <8 x i16> @hadds_const_bothhigh() {
235 ; CHECK-LABEL: hadds_const_bothhigh:
237 ; CHECK-NEXT: mov w8, #32766 // =0x7ffe
238 ; CHECK-NEXT: dup v0.8h, w8
240 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32>
241 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32>
242 %add = add <8 x i32> %ext1, %ext2
243 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
244 %result = trunc <8 x i32> %resulti16 to <8 x i16>
245 ret <8 x i16> %result
248 define <8 x i16> @hadds_undef(<8 x i16> %src1) {
249 ; CHECK-LABEL: hadds_undef:
251 ; CHECK-NEXT: sshll v1.4s, v0.4h, #0
252 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
253 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
254 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
256 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
257 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
258 %add = add <8 x i32> %zextsrc2, %zextsrc1
259 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
260 %result = trunc <8 x i32> %resulti16 to <8 x i16>
261 ret <8 x i16> %result
266 define <8 x i16> @hadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
267 ; CHECK-LABEL: hadds_i_base:
269 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
271 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
272 ret <8 x i16> %result
275 define <8 x i16> @hadds_i_const(<8 x i16> %src1) {
276 ; CHECK-LABEL: hadds_i_const:
278 ; CHECK-NEXT: movi v1.8h, #1
279 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
281 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
282 ret <8 x i16> %result
285 define <8 x i16> @hadds_i_const_lhs(<8 x i16> %src1) {
286 ; CHECK-LABEL: hadds_i_const_lhs:
288 ; CHECK-NEXT: movi v1.8h, #1
289 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
291 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
292 ret <8 x i16> %result
295 define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
296 ; CHECK-LABEL: hadds_i_const_zero:
298 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
300 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
301 ret <8 x i16> %result
304 define <8 x i16> @hadds_i_const_both() {
305 ; CHECK-LABEL: hadds_i_const_both:
307 ; CHECK-NEXT: movi v0.8h, #2
309 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
310 ret <8 x i16> %result
313 define <8 x i16> @hadds_i_const_bothhigh() {
314 ; CHECK-LABEL: hadds_i_const_bothhigh:
316 ; CHECK-NEXT: mov w8, #32766 // =0x7ffe
317 ; CHECK-NEXT: dup v0.8h, w8
319 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
320 ret <8 x i16> %result
323 define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
324 ; CHECK-LABEL: hadds_i_undef:
326 ; CHECK-NEXT: mov v0.16b, v1.16b
328 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
329 ret <8 x i16> %result
336 define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
337 ; CHECK-LABEL: rhaddu_base:
339 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
341 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
342 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
343 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
344 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
345 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
346 %result = trunc <8 x i32> %resulti16 to <8 x i16>
347 ret <8 x i16> %result
350 define <8 x i16> @rhaddu_const(<8 x i16> %src1) {
351 ; CHECK-LABEL: rhaddu_const:
353 ; CHECK-NEXT: movi v1.8h, #1
354 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
356 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
357 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
358 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
359 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
360 %result = trunc <8 x i32> %resulti16 to <8 x i16>
361 ret <8 x i16> %result
364 define <8 x i16> @rhaddu_const_lhs(<8 x i16> %src1) {
365 ; CHECK-LABEL: rhaddu_const_lhs:
367 ; CHECK-NEXT: movi v1.8h, #1
368 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
370 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
371 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
372 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
373 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
374 %result = trunc <8 x i32> %resulti16 to <8 x i16>
375 ret <8 x i16> %result
378 define <8 x i16> @rhaddu_const_zero(<8 x i16> %src1) {
379 ; CHECK-LABEL: rhaddu_const_zero:
381 ; CHECK-NEXT: movi v1.8h, #1
382 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
384 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
385 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
386 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
387 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
388 %result = trunc <8 x i32> %resulti16 to <8 x i16>
389 ret <8 x i16> %result
392 define <8 x i16> @rhaddu_const_both() {
393 ; CHECK-LABEL: rhaddu_const_both:
395 ; CHECK-NEXT: movi v0.8h, #2
397 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
398 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
399 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
400 %result = trunc <8 x i32> %resulti16 to <8 x i16>
401 ret <8 x i16> %result
404 define <8 x i16> @rhaddu_const_bothhigh() {
405 ; CHECK-LABEL: rhaddu_const_bothhigh:
407 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
409 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
410 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
411 %add1 = add <8 x i32> %ext1, %ext2
412 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
413 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
414 %result = trunc <8 x i32> %resulti16 to <8 x i16>
415 ret <8 x i16> %result
418 define <8 x i16> @rhaddu_undef(<8 x i16> %src1) {
419 ; CHECK-LABEL: rhaddu_undef:
421 ; CHECK-NEXT: movi v1.8h, #1
422 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
424 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
425 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
426 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
427 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
428 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
429 %result = trunc <8 x i32> %resulti16 to <8 x i16>
430 ret <8 x i16> %result
435 define <8 x i16> @rhaddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
436 ; CHECK-LABEL: rhaddu_i_base:
438 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
440 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
441 ret <8 x i16> %result
444 define <8 x i16> @rhaddu_i_const(<8 x i16> %src1) {
445 ; CHECK-LABEL: rhaddu_i_const:
447 ; CHECK-NEXT: movi v1.8h, #1
448 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
450 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
451 ret <8 x i16> %result
454 define <8 x i16> @rhaddu_i_const_lhs(<8 x i16> %src1) {
455 ; CHECK-LABEL: rhaddu_i_const_lhs:
457 ; CHECK-NEXT: movi v1.8h, #1
458 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
460 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
461 ret <8 x i16> %result
464 define <8 x i16> @rhaddu_i_const_zero(<8 x i16> %src1) {
465 ; CHECK-LABEL: rhaddu_i_const_zero:
467 ; CHECK-NEXT: movi v1.2d, #0000000000000000
468 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
470 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
471 ret <8 x i16> %result
474 define <8 x i16> @rhaddu_i_const_both() {
475 ; CHECK-LABEL: rhaddu_i_const_both:
477 ; CHECK-NEXT: movi v0.8h, #2
479 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
480 ret <8 x i16> %result
483 define <8 x i16> @rhaddu_i_const_bothhigh() {
484 ; CHECK-LABEL: rhaddu_i_const_bothhigh:
486 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
488 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
489 ret <8 x i16> %result
492 define <8 x i16> @rhaddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
493 ; CHECK-LABEL: rhaddu_i_undef:
495 ; CHECK-NEXT: mov v0.16b, v1.16b
497 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
498 ret <8 x i16> %result
505 define <8 x i16> @rhadds_base(<8 x i16> %src1, <8 x i16> %src2) {
506 ; CHECK-LABEL: rhadds_base:
508 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
510 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
511 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
512 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
513 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
514 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
515 %result = trunc <8 x i32> %resulti16 to <8 x i16>
516 ret <8 x i16> %result
519 define <8 x i16> @rhadds_const(<8 x i16> %src1) {
520 ; CHECK-LABEL: rhadds_const:
522 ; CHECK-NEXT: movi v1.8h, #1
523 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
525 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
526 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
527 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
528 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
529 %result = trunc <8 x i32> %resulti16 to <8 x i16>
530 ret <8 x i16> %result
533 define <8 x i16> @rhadds_const_lhs(<8 x i16> %src1) {
534 ; CHECK-LABEL: rhadds_const_lhs:
536 ; CHECK-NEXT: movi v1.8h, #1
537 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
539 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
540 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
541 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
542 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
543 %result = trunc <8 x i32> %resulti16 to <8 x i16>
544 ret <8 x i16> %result
547 define <8 x i16> @rhadds_const_zero(<8 x i16> %src1) {
548 ; CHECK-LABEL: rhadds_const_zero:
550 ; CHECK-NEXT: movi v1.8h, #1
551 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
553 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
554 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
555 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
556 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
557 %result = trunc <8 x i32> %resulti16 to <8 x i16>
558 ret <8 x i16> %result
561 define <8 x i16> @rhadds_const_both() {
562 ; CHECK-LABEL: rhadds_const_both:
564 ; CHECK-NEXT: movi v0.8h, #2
566 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
567 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
568 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
569 %result = trunc <8 x i32> %resulti16 to <8 x i16>
570 ret <8 x i16> %result
573 define <8 x i16> @rhadds_const_bothhigh() {
574 ; CHECK-LABEL: rhadds_const_bothhigh:
576 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
578 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32>
579 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32>
580 %add1 = add <8 x i32> %ext1, %ext2
581 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
582 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
583 %result = trunc <8 x i32> %resulti16 to <8 x i16>
584 ret <8 x i16> %result
587 define <8 x i16> @rhadds_undef(<8 x i16> %src1) {
588 ; CHECK-LABEL: rhadds_undef:
590 ; CHECK-NEXT: movi v1.8h, #1
591 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
593 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
594 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
595 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
596 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
597 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
598 %result = trunc <8 x i32> %resulti16 to <8 x i16>
599 ret <8 x i16> %result
604 define <8 x i16> @rhadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
605 ; CHECK-LABEL: rhadds_i_base:
607 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
609 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
610 ret <8 x i16> %result
613 define <8 x i16> @rhadds_i_const(<8 x i16> %src1) {
614 ; CHECK-LABEL: rhadds_i_const:
616 ; CHECK-NEXT: movi v1.8h, #1
617 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
619 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
620 ret <8 x i16> %result
623 define <8 x i16> @rhadds_i_const_lhs(<8 x i16> %src1) {
624 ; CHECK-LABEL: rhadds_i_const_lhs:
626 ; CHECK-NEXT: movi v1.8h, #1
627 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
629 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
630 ret <8 x i16> %result
633 define <8 x i16> @rhadds_i_const_zero(<8 x i16> %src1) {
634 ; CHECK-LABEL: rhadds_i_const_zero:
636 ; CHECK-NEXT: movi v1.2d, #0000000000000000
637 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
639 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
640 ret <8 x i16> %result
643 define <8 x i16> @rhadds_i_const_both() {
644 ; CHECK-LABEL: rhadds_i_const_both:
646 ; CHECK-NEXT: movi v0.8h, #2
648 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
649 ret <8 x i16> %result
652 define <8 x i16> @rhadds_i_const_bothhigh() {
653 ; CHECK-LABEL: rhadds_i_const_bothhigh:
655 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
657 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
658 ret <8 x i16> %result
661 define <8 x i16> @rhadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
662 ; CHECK-LABEL: rhadds_i_undef:
664 ; CHECK-NEXT: mov v0.16b, v1.16b
666 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
667 ret <8 x i16> %result
671 define <8 x i8> @shadd_v8i8(<8 x i8> %x) {
672 ; CHECK-LABEL: shadd_v8i8:
675 %r = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %x, <8 x i8> %x)
679 define <4 x i16> @shadd_v4i16(<4 x i16> %x) {
680 ; CHECK-LABEL: shadd_v4i16:
683 %r = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %x, <4 x i16> %x)
687 define <2 x i32> @shadd_v2i32(<2 x i32> %x) {
688 ; CHECK-LABEL: shadd_v2i32:
691 %r = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %x, <2 x i32> %x)
695 define <16 x i8> @shadd_v16i8(<16 x i8> %x) {
696 ; CHECK-LABEL: shadd_v16i8:
699 %r = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %x, <16 x i8> %x)
703 define <8 x i16> @shadd_v8i16(<8 x i16> %x) {
704 ; CHECK-LABEL: shadd_v8i16:
707 %r = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x, <8 x i16> %x)
711 define <4 x i32> @shadd_v4i32(<4 x i32> %x) {
712 ; CHECK-LABEL: shadd_v4i32:
715 %r = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %x, <4 x i32> %x)
719 define <8 x i8> @uhadd_v8i8(<8 x i8> %x) {
720 ; CHECK-LABEL: uhadd_v8i8:
723 %r = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
727 define <4 x i16> @uhadd_v4i16(<4 x i16> %x) {
728 ; CHECK-LABEL: uhadd_v4i16:
731 %r = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
735 define <2 x i32> @uhadd_v2i32(<2 x i32> %x) {
736 ; CHECK-LABEL: uhadd_v2i32:
739 %r = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
743 define <16 x i8> @uhadd_v16i8(<16 x i8> %x) {
744 ; CHECK-LABEL: uhadd_v16i8:
747 %r = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
751 define <8 x i16> @uhadd_v8i16(<8 x i16> %x) {
752 ; CHECK-LABEL: uhadd_v8i16:
755 %r = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
759 define <4 x i32> @uhadd_v4i32(<4 x i32> %x) {
760 ; CHECK-LABEL: uhadd_v4i32:
763 %r = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
766 define <8 x i8> @srhadd_v8i8(<8 x i8> %x) {
767 ; CHECK-LABEL: srhadd_v8i8:
770 %r = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
774 define <4 x i16> @srhadd_v4i16(<4 x i16> %x) {
775 ; CHECK-LABEL: srhadd_v4i16:
778 %r = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
782 define <2 x i32> @srhadd_v2i32(<2 x i32> %x) {
783 ; CHECK-LABEL: srhadd_v2i32:
786 %r = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
790 define <16 x i8> @srhadd_v16i8(<16 x i8> %x) {
791 ; CHECK-LABEL: srhadd_v16i8:
794 %r = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
798 define <8 x i16> @srhadd_v8i16(<8 x i16> %x) {
799 ; CHECK-LABEL: srhadd_v8i16:
802 %r = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
806 define <4 x i32> @srhadd_v4i32(<4 x i32> %x) {
807 ; CHECK-LABEL: srhadd_v4i32:
810 %r = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
814 define <8 x i8> @urhadd_v8i8(<8 x i8> %x) {
815 ; CHECK-LABEL: urhadd_v8i8:
818 %r = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
822 define <4 x i16> @urhadd_v4i16(<4 x i16> %x) {
823 ; CHECK-LABEL: urhadd_v4i16:
826 %r = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
830 define <2 x i32> @urhadd_v2i32(<2 x i32> %x) {
831 ; CHECK-LABEL: urhadd_v2i32:
834 %r = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
838 define <16 x i8> @urhadd_v16i8(<16 x i8> %x) {
839 ; CHECK-LABEL: urhadd_v16i8:
842 %r = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
846 define <8 x i16> @urhadd_v8i16(<8 x i16> %x) {
847 ; CHECK-LABEL: urhadd_v8i16:
850 %r = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
854 define <4 x i32> @urhadd_v4i32(<4 x i32> %x) {
855 ; CHECK-LABEL: urhadd_v4i32:
858 %r = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
862 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
863 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
864 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
865 declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
866 declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>)
867 declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>)
868 declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>)
869 declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
870 declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
871 declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>)
872 declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
873 declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>)
875 declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
876 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
877 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
878 declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
879 declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>)
880 declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>)
881 declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>)
882 declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
883 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
884 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
885 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
886 declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)