1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
4 define <8 x i16> @haddu_base(<8 x i16> %src1, <8 x i16> %src2) {
5 ; CHECK-LABEL: haddu_base:
7 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
9 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
10 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
11 %add = add <8 x i32> %zextsrc1, %zextsrc2
12 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
13 %result = trunc <8 x i32> %resulti16 to <8 x i16>
17 define <8 x i16> @haddu_const(<8 x i16> %src1) {
18 ; CHECK-LABEL: haddu_const:
20 ; CHECK-NEXT: movi v1.8h, #1
21 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
23 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
24 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
26 %result = trunc <8 x i32> %resulti16 to <8 x i16>
30 define <8 x i16> @haddu_const_lhs(<8 x i16> %src1) {
31 ; CHECK-LABEL: haddu_const_lhs:
33 ; CHECK-NEXT: movi v1.8h, #1
34 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
36 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
37 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
38 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
39 %result = trunc <8 x i32> %resulti16 to <8 x i16>
43 define <8 x i16> @haddu_const_zero(<8 x i16> %src1) {
44 ; CHECK-LABEL: haddu_const_zero:
46 ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
47 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
48 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
49 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
51 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
52 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
53 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
54 %result = trunc <8 x i32> %resulti16 to <8 x i16>
58 define <8 x i16> @haddu_const_both() {
59 ; CHECK-LABEL: haddu_const_both:
61 ; CHECK-NEXT: movi v0.8h, #2
63 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
64 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
65 %result = trunc <8 x i32> %resulti16 to <8 x i16>
69 define <8 x i16> @haddu_const_bothhigh() {
70 ; CHECK-LABEL: haddu_const_bothhigh:
72 ; CHECK-NEXT: mvni v0.8h, #1
74 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
75 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
76 %add = add <8 x i32> %ext1, %ext2
77 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
78 %result = trunc <8 x i32> %resulti16 to <8 x i16>
82 define <8 x i16> @haddu_undef(<8 x i16> %src1) {
83 ; CHECK-LABEL: haddu_undef:
85 ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
86 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
87 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
88 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
90 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
91 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
92 %add = add <8 x i32> %zextsrc2, %zextsrc1
93 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
94 %result = trunc <8 x i32> %resulti16 to <8 x i16>
100 define <8 x i16> @haddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
101 ; CHECK-LABEL: haddu_i_base:
103 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
105 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
106 ret <8 x i16> %result
109 define <8 x i16> @haddu_i_const(<8 x i16> %src1) {
110 ; CHECK-LABEL: haddu_i_const:
112 ; CHECK-NEXT: movi v1.8h, #1
113 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
115 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
116 ret <8 x i16> %result
119 define <8 x i16> @haddu_i_const_lhs(<8 x i16> %src1) {
120 ; CHECK-LABEL: haddu_i_const_lhs:
122 ; CHECK-NEXT: movi v1.8h, #1
123 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
125 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
126 ret <8 x i16> %result
129 define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
130 ; CHECK-LABEL: haddu_i_const_zero:
132 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
134 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
135 ret <8 x i16> %result
138 define <8 x i16> @haddu_i_const_both() {
139 ; CHECK-LABEL: haddu_i_const_both:
141 ; CHECK-NEXT: movi v0.8h, #2
143 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
144 ret <8 x i16> %result
147 define <8 x i16> @haddu_i_const_bothhigh() {
148 ; CHECK-LABEL: haddu_i_const_bothhigh:
150 ; CHECK-NEXT: mvni v0.8h, #1
152 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
153 ret <8 x i16> %result
156 define <8 x i16> @haddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
157 ; CHECK-LABEL: haddu_i_undef:
159 ; CHECK-NEXT: mov v0.16b, v1.16b
161 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
162 ret <8 x i16> %result
169 define <8 x i16> @hadds_base(<8 x i16> %src1, <8 x i16> %src2) {
170 ; CHECK-LABEL: hadds_base:
172 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
174 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
175 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
176 %add = add <8 x i32> %zextsrc1, %zextsrc2
177 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
178 %result = trunc <8 x i32> %resulti16 to <8 x i16>
179 ret <8 x i16> %result
182 define <8 x i16> @hadds_const(<8 x i16> %src1) {
183 ; CHECK-LABEL: hadds_const:
185 ; CHECK-NEXT: movi v1.8h, #1
186 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
188 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
189 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
190 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
191 %result = trunc <8 x i32> %resulti16 to <8 x i16>
192 ret <8 x i16> %result
195 define <8 x i16> @hadds_const_lhs(<8 x i16> %src1) {
196 ; CHECK-LABEL: hadds_const_lhs:
198 ; CHECK-NEXT: movi v1.8h, #1
199 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
201 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
202 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
203 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
204 %result = trunc <8 x i32> %resulti16 to <8 x i16>
205 ret <8 x i16> %result
208 define <8 x i16> @hadds_const_zero(<8 x i16> %src1) {
209 ; CHECK-LABEL: hadds_const_zero:
211 ; CHECK-NEXT: sshll v1.4s, v0.4h, #0
212 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
213 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
214 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
216 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
217 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
218 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
219 %result = trunc <8 x i32> %resulti16 to <8 x i16>
220 ret <8 x i16> %result
223 define <8 x i16> @hadds_const_both() {
224 ; CHECK-LABEL: hadds_const_both:
226 ; CHECK-NEXT: movi v0.8h, #2
228 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
229 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
230 %result = trunc <8 x i32> %resulti16 to <8 x i16>
231 ret <8 x i16> %result
234 define <8 x i16> @hadds_const_bothhigh() {
235 ; CHECK-LABEL: hadds_const_bothhigh:
237 ; CHECK-NEXT: mov w8, #32766 // =0x7ffe
238 ; CHECK-NEXT: dup v0.8h, w8
240 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32>
241 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32>
242 %add = add <8 x i32> %ext1, %ext2
243 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
244 %result = trunc <8 x i32> %resulti16 to <8 x i16>
245 ret <8 x i16> %result
248 define <8 x i16> @hadds_undef(<8 x i16> %src1) {
249 ; CHECK-LABEL: hadds_undef:
251 ; CHECK-NEXT: sshll v1.4s, v0.4h, #0
252 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
253 ; CHECK-NEXT: shrn v0.4h, v1.4s, #1
254 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
256 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
257 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
258 %add = add <8 x i32> %zextsrc2, %zextsrc1
259 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
260 %result = trunc <8 x i32> %resulti16 to <8 x i16>
261 ret <8 x i16> %result
266 define <8 x i16> @hadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
267 ; CHECK-LABEL: hadds_i_base:
269 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
271 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
272 ret <8 x i16> %result
275 define <8 x i16> @hadds_i_const(<8 x i16> %src1) {
276 ; CHECK-LABEL: hadds_i_const:
278 ; CHECK-NEXT: movi v1.8h, #1
279 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
281 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
282 ret <8 x i16> %result
285 define <8 x i16> @hadds_i_const_lhs(<8 x i16> %src1) {
286 ; CHECK-LABEL: hadds_i_const_lhs:
288 ; CHECK-NEXT: movi v1.8h, #1
289 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
291 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
292 ret <8 x i16> %result
295 define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
296 ; CHECK-LABEL: hadds_i_const_zero:
298 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
300 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
301 ret <8 x i16> %result
304 define <8 x i16> @hadds_i_const_both() {
305 ; CHECK-LABEL: hadds_i_const_both:
307 ; CHECK-NEXT: movi v0.8h, #2
309 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
310 ret <8 x i16> %result
313 define <8 x i16> @hadds_i_const_bothhigh() {
314 ; CHECK-LABEL: hadds_i_const_bothhigh:
316 ; CHECK-NEXT: mov w8, #32766 // =0x7ffe
317 ; CHECK-NEXT: dup v0.8h, w8
319 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
320 ret <8 x i16> %result
323 define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
324 ; CHECK-LABEL: hadds_i_undef:
326 ; CHECK-NEXT: mov v0.16b, v1.16b
328 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
329 ret <8 x i16> %result
332 define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
333 ; CHECK-LABEL: sub_fixedwidth_v4i32:
335 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
337 %or = or <8 x i16> %a0, %a1
338 %xor = xor <8 x i16> %a0, %a1
339 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340 %res = sub <8 x i16> %or, %srl
344 define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
345 ; CHECK-LABEL: srhadd_fixedwidth_v8i16:
347 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
349 %or = or <8 x i16> %a0, %a1
350 %xor = xor <8 x i16> %a0, %a1
351 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
352 %res = sub <8 x i16> %or, %srl
356 define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
357 ; CHECK-LABEL: rhaddu_base:
359 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
361 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
362 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
363 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
364 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
365 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
366 %result = trunc <8 x i32> %resulti16 to <8 x i16>
367 ret <8 x i16> %result
370 define <8 x i16> @rhaddu_const(<8 x i16> %src1) {
371 ; CHECK-LABEL: rhaddu_const:
373 ; CHECK-NEXT: movi v1.8h, #1
374 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
376 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
377 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
378 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
379 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
380 %result = trunc <8 x i32> %resulti16 to <8 x i16>
381 ret <8 x i16> %result
384 define <8 x i16> @rhaddu_const_lhs(<8 x i16> %src1) {
385 ; CHECK-LABEL: rhaddu_const_lhs:
387 ; CHECK-NEXT: movi v1.8h, #1
388 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
390 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
391 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
392 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
393 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
394 %result = trunc <8 x i32> %resulti16 to <8 x i16>
395 ret <8 x i16> %result
398 define <8 x i16> @rhaddu_const_zero(<8 x i16> %src1) {
399 ; CHECK-LABEL: rhaddu_const_zero:
401 ; CHECK-NEXT: movi v1.8h, #1
402 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
404 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
405 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
406 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
407 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
408 %result = trunc <8 x i32> %resulti16 to <8 x i16>
409 ret <8 x i16> %result
412 define <8 x i16> @rhaddu_const_both() {
413 ; CHECK-LABEL: rhaddu_const_both:
415 ; CHECK-NEXT: movi v0.8h, #2
417 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
418 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
419 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
420 %result = trunc <8 x i32> %resulti16 to <8 x i16>
421 ret <8 x i16> %result
424 define <8 x i16> @rhaddu_const_bothhigh() {
425 ; CHECK-LABEL: rhaddu_const_bothhigh:
427 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
429 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
430 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
431 %add1 = add <8 x i32> %ext1, %ext2
432 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
433 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
434 %result = trunc <8 x i32> %resulti16 to <8 x i16>
435 ret <8 x i16> %result
438 define <8 x i16> @rhaddu_undef(<8 x i16> %src1) {
439 ; CHECK-LABEL: rhaddu_undef:
441 ; CHECK-NEXT: movi v1.8h, #1
442 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
444 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
445 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
446 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
447 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
448 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
449 %result = trunc <8 x i32> %resulti16 to <8 x i16>
450 ret <8 x i16> %result
455 define <8 x i16> @rhaddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
456 ; CHECK-LABEL: rhaddu_i_base:
458 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
460 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
461 ret <8 x i16> %result
464 define <8 x i16> @rhaddu_i_const(<8 x i16> %src1) {
465 ; CHECK-LABEL: rhaddu_i_const:
467 ; CHECK-NEXT: movi v1.8h, #1
468 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
470 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
471 ret <8 x i16> %result
474 define <8 x i16> @rhaddu_i_const_lhs(<8 x i16> %src1) {
475 ; CHECK-LABEL: rhaddu_i_const_lhs:
477 ; CHECK-NEXT: movi v1.8h, #1
478 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
480 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
481 ret <8 x i16> %result
484 define <8 x i16> @rhaddu_i_const_zero(<8 x i16> %src1) {
485 ; CHECK-LABEL: rhaddu_i_const_zero:
487 ; CHECK-NEXT: movi v1.2d, #0000000000000000
488 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
490 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
491 ret <8 x i16> %result
494 define <8 x i16> @rhaddu_i_const_both() {
495 ; CHECK-LABEL: rhaddu_i_const_both:
497 ; CHECK-NEXT: movi v0.8h, #2
499 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
500 ret <8 x i16> %result
503 define <8 x i16> @rhaddu_i_const_bothhigh() {
504 ; CHECK-LABEL: rhaddu_i_const_bothhigh:
506 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
508 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
509 ret <8 x i16> %result
512 define <8 x i16> @rhaddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
513 ; CHECK-LABEL: rhaddu_i_undef:
515 ; CHECK-NEXT: mov v0.16b, v1.16b
517 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
518 ret <8 x i16> %result
525 define <8 x i16> @rhadds_base(<8 x i16> %src1, <8 x i16> %src2) {
526 ; CHECK-LABEL: rhadds_base:
528 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
530 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
531 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
532 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
533 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
534 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
535 %result = trunc <8 x i32> %resulti16 to <8 x i16>
536 ret <8 x i16> %result
539 define <8 x i16> @rhadds_const(<8 x i16> %src1) {
540 ; CHECK-LABEL: rhadds_const:
542 ; CHECK-NEXT: movi v1.8h, #1
543 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
545 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
546 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
547 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
548 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
549 %result = trunc <8 x i32> %resulti16 to <8 x i16>
550 ret <8 x i16> %result
553 define <8 x i16> @rhadds_const_lhs(<8 x i16> %src1) {
554 ; CHECK-LABEL: rhadds_const_lhs:
556 ; CHECK-NEXT: movi v1.8h, #1
557 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
559 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
560 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
561 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
562 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
563 %result = trunc <8 x i32> %resulti16 to <8 x i16>
564 ret <8 x i16> %result
567 define <8 x i16> @rhadds_const_zero(<8 x i16> %src1) {
568 ; CHECK-LABEL: rhadds_const_zero:
570 ; CHECK-NEXT: movi v1.8h, #1
571 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
573 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
574 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
575 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
576 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
577 %result = trunc <8 x i32> %resulti16 to <8 x i16>
578 ret <8 x i16> %result
581 define <8 x i16> @rhadds_const_both() {
582 ; CHECK-LABEL: rhadds_const_both:
584 ; CHECK-NEXT: movi v0.8h, #2
586 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
587 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
588 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
589 %result = trunc <8 x i32> %resulti16 to <8 x i16>
590 ret <8 x i16> %result
593 define <8 x i16> @rhadds_const_bothhigh() {
594 ; CHECK-LABEL: rhadds_const_bothhigh:
596 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
598 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32>
599 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32>
600 %add1 = add <8 x i32> %ext1, %ext2
601 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
602 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
603 %result = trunc <8 x i32> %resulti16 to <8 x i16>
604 ret <8 x i16> %result
607 define <8 x i16> @rhadds_undef(<8 x i16> %src1) {
608 ; CHECK-LABEL: rhadds_undef:
610 ; CHECK-NEXT: movi v1.8h, #1
611 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
613 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
614 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
615 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
616 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
617 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
618 %result = trunc <8 x i32> %resulti16 to <8 x i16>
619 ret <8 x i16> %result
624 define <8 x i16> @rhadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
625 ; CHECK-LABEL: rhadds_i_base:
627 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
629 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
630 ret <8 x i16> %result
633 define <8 x i16> @rhadds_i_const(<8 x i16> %src1) {
634 ; CHECK-LABEL: rhadds_i_const:
636 ; CHECK-NEXT: movi v1.8h, #1
637 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
639 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
640 ret <8 x i16> %result
643 define <8 x i16> @rhadds_i_const_lhs(<8 x i16> %src1) {
644 ; CHECK-LABEL: rhadds_i_const_lhs:
646 ; CHECK-NEXT: movi v1.8h, #1
647 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
649 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
650 ret <8 x i16> %result
653 define <8 x i16> @rhadds_i_const_zero(<8 x i16> %src1) {
654 ; CHECK-LABEL: rhadds_i_const_zero:
656 ; CHECK-NEXT: movi v1.2d, #0000000000000000
657 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
659 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
660 ret <8 x i16> %result
663 define <8 x i16> @rhadds_i_const_both() {
664 ; CHECK-LABEL: rhadds_i_const_both:
666 ; CHECK-NEXT: movi v0.8h, #2
668 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
669 ret <8 x i16> %result
672 define <8 x i16> @rhadds_i_const_bothhigh() {
673 ; CHECK-LABEL: rhadds_i_const_bothhigh:
675 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
677 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
678 ret <8 x i16> %result
681 define <8 x i16> @rhadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
682 ; CHECK-LABEL: rhadds_i_undef:
684 ; CHECK-NEXT: mov v0.16b, v1.16b
686 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
687 ret <8 x i16> %result
691 define <8 x i8> @shadd_v8i8(<8 x i8> %x) {
692 ; CHECK-LABEL: shadd_v8i8:
695 %r = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %x, <8 x i8> %x)
699 define <4 x i16> @shadd_v4i16(<4 x i16> %x) {
700 ; CHECK-LABEL: shadd_v4i16:
703 %r = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %x, <4 x i16> %x)
707 define <2 x i32> @shadd_v2i32(<2 x i32> %x) {
708 ; CHECK-LABEL: shadd_v2i32:
711 %r = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %x, <2 x i32> %x)
715 define <16 x i8> @shadd_v16i8(<16 x i8> %x) {
716 ; CHECK-LABEL: shadd_v16i8:
719 %r = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %x, <16 x i8> %x)
723 define <8 x i16> @shadd_v8i16(<8 x i16> %x) {
724 ; CHECK-LABEL: shadd_v8i16:
727 %r = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x, <8 x i16> %x)
731 define <4 x i32> @shadd_v4i32(<4 x i32> %x) {
732 ; CHECK-LABEL: shadd_v4i32:
735 %r = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %x, <4 x i32> %x)
739 define <8 x i8> @uhadd_v8i8(<8 x i8> %x) {
740 ; CHECK-LABEL: uhadd_v8i8:
743 %r = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
747 define <4 x i16> @uhadd_v4i16(<4 x i16> %x) {
748 ; CHECK-LABEL: uhadd_v4i16:
751 %r = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
755 define <2 x i32> @uhadd_v2i32(<2 x i32> %x) {
756 ; CHECK-LABEL: uhadd_v2i32:
759 %r = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
763 define <16 x i8> @uhadd_v16i8(<16 x i8> %x) {
764 ; CHECK-LABEL: uhadd_v16i8:
767 %r = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
771 define <8 x i16> @uhadd_v8i16(<8 x i16> %x) {
772 ; CHECK-LABEL: uhadd_v8i16:
775 %r = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
779 define <4 x i32> @uhadd_v4i32(<4 x i32> %x) {
780 ; CHECK-LABEL: uhadd_v4i32:
783 %r = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
786 define <8 x i8> @srhadd_v8i8(<8 x i8> %x) {
787 ; CHECK-LABEL: srhadd_v8i8:
790 %r = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
794 define <4 x i16> @srhadd_v4i16(<4 x i16> %x) {
795 ; CHECK-LABEL: srhadd_v4i16:
798 %r = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
802 define <2 x i32> @srhadd_v2i32(<2 x i32> %x) {
803 ; CHECK-LABEL: srhadd_v2i32:
806 %r = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
810 define <16 x i8> @srhadd_v16i8(<16 x i8> %x) {
811 ; CHECK-LABEL: srhadd_v16i8:
814 %r = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
818 define <8 x i16> @srhadd_v8i16(<8 x i16> %x) {
819 ; CHECK-LABEL: srhadd_v8i16:
822 %r = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
826 define <4 x i32> @srhadd_v4i32(<4 x i32> %x) {
827 ; CHECK-LABEL: srhadd_v4i32:
830 %r = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
834 define <8 x i8> @urhadd_v8i8(<8 x i8> %x) {
835 ; CHECK-LABEL: urhadd_v8i8:
838 %r = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
842 define <4 x i16> @urhadd_v4i16(<4 x i16> %x) {
843 ; CHECK-LABEL: urhadd_v4i16:
846 %r = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %x, <4 x i16> %x)
850 define <2 x i32> @urhadd_v2i32(<2 x i32> %x) {
851 ; CHECK-LABEL: urhadd_v2i32:
854 %r = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %x, <2 x i32> %x)
858 define <16 x i8> @urhadd_v16i8(<16 x i8> %x) {
859 ; CHECK-LABEL: urhadd_v16i8:
862 %r = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %x, <16 x i8> %x)
866 define <8 x i16> @urhadd_v8i16(<8 x i16> %x) {
867 ; CHECK-LABEL: urhadd_v8i16:
870 %r = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
874 define <4 x i32> @urhadd_v4i32(<4 x i32> %x) {
875 ; CHECK-LABEL: urhadd_v4i32:
878 %r = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %x, <4 x i32> %x)
882 define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
883 ; CHECK-LABEL: uhadd_fixedwidth_v4i32:
885 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
887 %and = and <8 x i16> %a0, %a1
888 %xor = xor <8 x i16> %a0, %a1
889 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
890 %res = add <8 x i16> %and, %srl
894 define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
895 ; CHECK-LABEL: shadd_fixedwidth_v8i16:
897 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
899 %and = and <8 x i16> %a0, %a1
900 %xor = xor <8 x i16> %a0, %a1
901 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
902 %res = add <8 x i16> %and, %srl
906 define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
907 ; CHECK-LABEL: shadd_demandedelts:
909 ; CHECK-NEXT: dup v0.8h, v0.h[0]
910 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
911 ; CHECK-NEXT: dup v0.8h, v0.h[0]
913 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
914 %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
915 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
919 define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
920 ; CHECK-LABEL: srhadd_demandedelts:
922 ; CHECK-NEXT: dup v0.8h, v0.h[0]
923 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
924 ; CHECK-NEXT: dup v0.8h, v0.h[0]
926 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
927 %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
928 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
932 define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
933 ; CHECK-LABEL: uhadd_demandedelts:
935 ; CHECK-NEXT: dup v0.8h, v0.h[0]
936 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
937 ; CHECK-NEXT: dup v0.8h, v0.h[0]
939 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
940 %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
941 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
945 define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
946 ; CHECK-LABEL: urhadd_demandedelts:
948 ; CHECK-NEXT: dup v0.8h, v0.h[0]
949 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
950 ; CHECK-NEXT: dup v0.8h, v0.h[0]
952 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
953 %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
954 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
958 ; Remove unnecessary sign_extend_inreg after shadd
959 define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
960 ; CHECK-LABEL: shadd_signbits_v2i32:
962 ; CHECK-NEXT: sshr v0.2s, v0.2s, #17
963 ; CHECK-NEXT: sshr v1.2s, v1.2s, #17
964 ; CHECK-NEXT: shadd v0.2s, v0.2s, v1.2s
965 ; CHECK-NEXT: str d0, [x0]
967 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
968 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
969 %m = and <2 x i32> %x0, %x1
970 %s = xor <2 x i32> %x0, %x1
971 %x = ashr <2 x i32> %s, <i32 1, i32 1>
972 %avg = add <2 x i32> %m, %x
973 %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
974 %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
975 store <2 x i32> %avg, ptr %p2 ; extra use
979 ; Remove unnecessary sign_extend_inreg after srhadd
980 define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
981 ; CHECK-LABEL: srhadd_signbits_v2i32:
983 ; CHECK-NEXT: sshr v0.2s, v0.2s, #17
984 ; CHECK-NEXT: sshr v1.2s, v1.2s, #17
985 ; CHECK-NEXT: srhadd v0.2s, v0.2s, v1.2s
986 ; CHECK-NEXT: str d0, [x0]
988 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
989 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
990 %m = or <2 x i32> %x0, %x1
991 %s = xor <2 x i32> %x0, %x1
992 %x = ashr <2 x i32> %s, <i32 1, i32 1>
993 %avg = sub <2 x i32> %m, %x
994 %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
995 %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
996 store <2 x i32> %avg, ptr %p2 ; extra use
1000 ; negative test - not enough signbits to remove sign_extend_inreg after srhadd
1001 define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
1002 ; CHECK-LABEL: srhadd_signbits_v2i32_negative:
1004 ; CHECK-NEXT: sshr v0.2s, v0.2s, #17
1005 ; CHECK-NEXT: sshr v1.2s, v1.2s, #17
1006 ; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s
1007 ; CHECK-NEXT: shl v0.2s, v1.2s, #22
1008 ; CHECK-NEXT: str d1, [x0]
1009 ; CHECK-NEXT: sshr v0.2s, v0.2s, #22
1011 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
1012 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
1013 %m = or <2 x i32> %x0, %x1
1014 %s = xor <2 x i32> %x0, %x1
1015 %x = ashr <2 x i32> %s, <i32 1, i32 1>
1016 %avg = sub <2 x i32> %m, %x
1017 %avg1 = shl <2 x i32> %avg, <i32 22, i32 22>
1018 %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22>
1019 store <2 x i32> %avg, ptr %p2 ; extra use
1023 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
1024 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
1025 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
1026 declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
1027 declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>)
1028 declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>)
1029 declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>)
1030 declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
1031 declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
1032 declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>)
1033 declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
1034 declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>)
1036 declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
1037 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
1038 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
1039 declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
1040 declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>)
1041 declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>)
1042 declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>)
1043 declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
1044 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
1045 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
1046 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
1047 declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)