1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2
5 define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
6 ; SVE-LABEL: hadds_v2i64:
7 ; SVE: // %bb.0: // %entry
8 ; SVE-NEXT: eor z2.d, z0.d, z1.d
9 ; SVE-NEXT: and z0.d, z0.d, z1.d
10 ; SVE-NEXT: asr z1.d, z2.d, #1
11 ; SVE-NEXT: add z0.d, z0.d, z1.d
14 ; SVE2-LABEL: hadds_v2i64:
15 ; SVE2: // %bb.0: // %entry
16 ; SVE2-NEXT: ptrue p0.d
17 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
20 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
21 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
22 %m = add nsw <vscale x 2 x i128> %s0s, %s1s
23 %s = ashr <vscale x 2 x i128> %m, splat (i128 1)
24 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
25 ret <vscale x 2 x i64> %s2
28 define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
29 ; SVE-LABEL: hadds_v2i64_lsh:
30 ; SVE: // %bb.0: // %entry
31 ; SVE-NEXT: eor z2.d, z0.d, z1.d
32 ; SVE-NEXT: and z0.d, z0.d, z1.d
33 ; SVE-NEXT: asr z1.d, z2.d, #1
34 ; SVE-NEXT: add z0.d, z0.d, z1.d
37 ; SVE2-LABEL: hadds_v2i64_lsh:
38 ; SVE2: // %bb.0: // %entry
39 ; SVE2-NEXT: ptrue p0.d
40 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
43 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
44 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
45 %m = add nsw <vscale x 2 x i128> %s0s, %s1s
46 %s = lshr <vscale x 2 x i128> %m, splat (i128 1)
47 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
48 ret <vscale x 2 x i64> %s2
51 define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
52 ; SVE-LABEL: haddu_v2i64:
53 ; SVE: // %bb.0: // %entry
54 ; SVE-NEXT: eor z2.d, z0.d, z1.d
55 ; SVE-NEXT: and z0.d, z0.d, z1.d
56 ; SVE-NEXT: lsr z1.d, z2.d, #1
57 ; SVE-NEXT: add z0.d, z0.d, z1.d
60 ; SVE2-LABEL: haddu_v2i64:
61 ; SVE2: // %bb.0: // %entry
62 ; SVE2-NEXT: ptrue p0.d
63 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
66 %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
67 %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
68 %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
69 %s = lshr <vscale x 2 x i128> %m, splat (i128 1)
70 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
71 ret <vscale x 2 x i64> %s2
74 define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
75 ; SVE-LABEL: hadds_v2i32:
76 ; SVE: // %bb.0: // %entry
77 ; SVE-NEXT: ptrue p0.d
78 ; SVE-NEXT: sxtw z0.d, p0/m, z0.d
79 ; SVE-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
80 ; SVE-NEXT: asr z0.d, z0.d, #1
83 ; SVE2-LABEL: hadds_v2i32:
84 ; SVE2: // %bb.0: // %entry
85 ; SVE2-NEXT: ptrue p0.d
86 ; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
87 ; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
88 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
91 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
92 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
93 %m = add nsw <vscale x 2 x i64> %s0s, %s1s
94 %s = ashr <vscale x 2 x i64> %m, splat (i64 1)
95 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
96 ret <vscale x 2 x i32> %s2
99 define <vscale x 2 x i32> @hadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
100 ; CHECK-LABEL: hadds_v2i32_lsh:
101 ; CHECK: // %bb.0: // %entry
102 ; CHECK-NEXT: ptrue p0.d
103 ; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
104 ; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
105 ; CHECK-NEXT: lsr z0.d, z0.d, #1
108 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
109 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
110 %m = add nsw <vscale x 2 x i64> %s0s, %s1s
111 %s = lshr <vscale x 2 x i64> %m, splat (i64 1)
112 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
113 ret <vscale x 2 x i32> %s2
116 define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
117 ; SVE-LABEL: haddu_v2i32:
118 ; SVE: // %bb.0: // %entry
119 ; SVE-NEXT: and z0.d, z0.d, #0xffffffff
120 ; SVE-NEXT: adr z0.d, [z0.d, z1.d, uxtw]
121 ; SVE-NEXT: lsr z0.d, z0.d, #1
124 ; SVE2-LABEL: haddu_v2i32:
125 ; SVE2: // %bb.0: // %entry
126 ; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
127 ; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
128 ; SVE2-NEXT: ptrue p0.d
129 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
132 %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
133 %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
134 %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
135 %s = lshr <vscale x 2 x i64> %m, splat (i64 1)
136 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
137 ret <vscale x 2 x i32> %s2
140 define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
141 ; SVE-LABEL: hadds_v4i32:
142 ; SVE: // %bb.0: // %entry
143 ; SVE-NEXT: eor z2.d, z0.d, z1.d
144 ; SVE-NEXT: and z0.d, z0.d, z1.d
145 ; SVE-NEXT: asr z1.s, z2.s, #1
146 ; SVE-NEXT: add z0.s, z0.s, z1.s
149 ; SVE2-LABEL: hadds_v4i32:
150 ; SVE2: // %bb.0: // %entry
151 ; SVE2-NEXT: ptrue p0.s
152 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
155 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
156 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
157 %m = add nsw <vscale x 4 x i64> %s0s, %s1s
158 %s = ashr <vscale x 4 x i64> %m, splat (i64 1)
159 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
160 ret <vscale x 4 x i32> %s2
163 define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
164 ; SVE-LABEL: hadds_v4i32_lsh:
165 ; SVE: // %bb.0: // %entry
166 ; SVE-NEXT: eor z2.d, z0.d, z1.d
167 ; SVE-NEXT: and z0.d, z0.d, z1.d
168 ; SVE-NEXT: asr z1.s, z2.s, #1
169 ; SVE-NEXT: add z0.s, z0.s, z1.s
172 ; SVE2-LABEL: hadds_v4i32_lsh:
173 ; SVE2: // %bb.0: // %entry
174 ; SVE2-NEXT: ptrue p0.s
175 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
178 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
179 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
180 %m = add nsw <vscale x 4 x i64> %s0s, %s1s
181 %s = lshr <vscale x 4 x i64> %m, splat (i64 1)
182 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
183 ret <vscale x 4 x i32> %s2
186 define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
187 ; SVE-LABEL: haddu_v4i32:
188 ; SVE: // %bb.0: // %entry
189 ; SVE-NEXT: eor z2.d, z0.d, z1.d
190 ; SVE-NEXT: and z0.d, z0.d, z1.d
191 ; SVE-NEXT: lsr z1.s, z2.s, #1
192 ; SVE-NEXT: add z0.s, z0.s, z1.s
195 ; SVE2-LABEL: haddu_v4i32:
196 ; SVE2: // %bb.0: // %entry
197 ; SVE2-NEXT: ptrue p0.s
198 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
201 %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
202 %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
203 %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
204 %s = lshr <vscale x 4 x i64> %m, splat (i64 1)
205 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
206 ret <vscale x 4 x i32> %s2
209 define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
210 ; SVE-LABEL: hadds_v2i16:
211 ; SVE: // %bb.0: // %entry
212 ; SVE-NEXT: ptrue p0.d
213 ; SVE-NEXT: sxth z1.d, p0/m, z1.d
214 ; SVE-NEXT: sxth z0.d, p0/m, z0.d
215 ; SVE-NEXT: add z0.d, z0.d, z1.d
216 ; SVE-NEXT: asr z0.d, z0.d, #1
219 ; SVE2-LABEL: hadds_v2i16:
220 ; SVE2: // %bb.0: // %entry
221 ; SVE2-NEXT: ptrue p0.d
222 ; SVE2-NEXT: sxth z1.d, p0/m, z1.d
223 ; SVE2-NEXT: sxth z0.d, p0/m, z0.d
224 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
227 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
228 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
229 %m = add nsw <vscale x 2 x i32> %s0s, %s1s
230 %s = ashr <vscale x 2 x i32> %m, splat (i32 1)
231 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
232 ret <vscale x 2 x i16> %s2
235 define <vscale x 2 x i16> @hadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
236 ; CHECK-LABEL: hadds_v2i16_lsh:
237 ; CHECK: // %bb.0: // %entry
238 ; CHECK-NEXT: ptrue p0.d
239 ; CHECK-NEXT: sxth z0.d, p0/m, z0.d
240 ; CHECK-NEXT: sxth z1.d, p0/m, z1.d
241 ; CHECK-NEXT: add z0.d, z0.d, z1.d
242 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
243 ; CHECK-NEXT: lsr z0.d, z0.d, #1
246 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
247 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
248 %m = add nsw <vscale x 2 x i32> %s0s, %s1s
249 %s = lshr <vscale x 2 x i32> %m, splat (i32 1)
250 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
251 ret <vscale x 2 x i16> %s2
254 define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
255 ; SVE-LABEL: haddu_v2i16:
256 ; SVE: // %bb.0: // %entry
257 ; SVE-NEXT: and z1.d, z1.d, #0xffff
258 ; SVE-NEXT: and z0.d, z0.d, #0xffff
259 ; SVE-NEXT: add z0.d, z0.d, z1.d
260 ; SVE-NEXT: lsr z0.d, z0.d, #1
263 ; SVE2-LABEL: haddu_v2i16:
264 ; SVE2: // %bb.0: // %entry
265 ; SVE2-NEXT: and z1.d, z1.d, #0xffff
266 ; SVE2-NEXT: and z0.d, z0.d, #0xffff
267 ; SVE2-NEXT: ptrue p0.d
268 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
271 %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
272 %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
273 %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
274 %s = lshr <vscale x 2 x i32> %m, splat (i32 1)
275 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
276 ret <vscale x 2 x i16> %s2
279 define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
280 ; SVE-LABEL: hadds_v4i16:
281 ; SVE: // %bb.0: // %entry
282 ; SVE-NEXT: ptrue p0.s
283 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
284 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
285 ; SVE-NEXT: add z0.s, z0.s, z1.s
286 ; SVE-NEXT: asr z0.s, z0.s, #1
289 ; SVE2-LABEL: hadds_v4i16:
290 ; SVE2: // %bb.0: // %entry
291 ; SVE2-NEXT: ptrue p0.s
292 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
293 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
294 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
297 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
298 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
299 %m = add nsw <vscale x 4 x i32> %s0s, %s1s
300 %s = ashr <vscale x 4 x i32> %m, splat (i32 1)
301 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
302 ret <vscale x 4 x i16> %s2
305 define <vscale x 4 x i16> @hadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
306 ; CHECK-LABEL: hadds_v4i16_lsh:
307 ; CHECK: // %bb.0: // %entry
308 ; CHECK-NEXT: ptrue p0.s
309 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s
310 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s
311 ; CHECK-NEXT: add z0.s, z0.s, z1.s
312 ; CHECK-NEXT: lsr z0.s, z0.s, #1
315 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
316 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
317 %m = add nsw <vscale x 4 x i32> %s0s, %s1s
318 %s = lshr <vscale x 4 x i32> %m, splat (i32 1)
319 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
320 ret <vscale x 4 x i16> %s2
323 define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
324 ; SVE-LABEL: haddu_v4i16:
325 ; SVE: // %bb.0: // %entry
326 ; SVE-NEXT: and z1.s, z1.s, #0xffff
327 ; SVE-NEXT: and z0.s, z0.s, #0xffff
328 ; SVE-NEXT: add z0.s, z0.s, z1.s
329 ; SVE-NEXT: lsr z0.s, z0.s, #1
332 ; SVE2-LABEL: haddu_v4i16:
333 ; SVE2: // %bb.0: // %entry
334 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
335 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
336 ; SVE2-NEXT: ptrue p0.s
337 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
340 %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
341 %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
342 %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
343 %s = lshr <vscale x 4 x i32> %m, splat (i32 1)
344 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
345 ret <vscale x 4 x i16> %s2
348 define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
349 ; SVE-LABEL: hadds_v8i16:
350 ; SVE: // %bb.0: // %entry
351 ; SVE-NEXT: eor z2.d, z0.d, z1.d
352 ; SVE-NEXT: and z0.d, z0.d, z1.d
353 ; SVE-NEXT: asr z1.h, z2.h, #1
354 ; SVE-NEXT: add z0.h, z0.h, z1.h
357 ; SVE2-LABEL: hadds_v8i16:
358 ; SVE2: // %bb.0: // %entry
359 ; SVE2-NEXT: ptrue p0.h
360 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
363 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
364 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
365 %m = add nsw <vscale x 8 x i32> %s0s, %s1s
366 %s = ashr <vscale x 8 x i32> %m, splat (i32 1)
367 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
368 ret <vscale x 8 x i16> %s2
371 define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
372 ; SVE-LABEL: hadds_v8i16_lsh:
373 ; SVE: // %bb.0: // %entry
374 ; SVE-NEXT: eor z2.d, z0.d, z1.d
375 ; SVE-NEXT: and z0.d, z0.d, z1.d
376 ; SVE-NEXT: asr z1.h, z2.h, #1
377 ; SVE-NEXT: add z0.h, z0.h, z1.h
380 ; SVE2-LABEL: hadds_v8i16_lsh:
381 ; SVE2: // %bb.0: // %entry
382 ; SVE2-NEXT: ptrue p0.h
383 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
386 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
387 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
388 %m = add nsw <vscale x 8 x i32> %s0s, %s1s
389 %s = lshr <vscale x 8 x i32> %m, splat (i32 1)
390 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
391 ret <vscale x 8 x i16> %s2
394 define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
395 ; SVE-LABEL: haddu_v8i16:
396 ; SVE: // %bb.0: // %entry
397 ; SVE-NEXT: eor z2.d, z0.d, z1.d
398 ; SVE-NEXT: and z0.d, z0.d, z1.d
399 ; SVE-NEXT: lsr z1.h, z2.h, #1
400 ; SVE-NEXT: add z0.h, z0.h, z1.h
403 ; SVE2-LABEL: haddu_v8i16:
404 ; SVE2: // %bb.0: // %entry
405 ; SVE2-NEXT: ptrue p0.h
406 ; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
409 %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
410 %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
411 %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
412 %s = lshr <vscale x 8 x i32> %m, splat (i32 1)
413 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
414 ret <vscale x 8 x i16> %s2
417 define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
418 ; SVE-LABEL: hadds_v4i8:
419 ; SVE: // %bb.0: // %entry
420 ; SVE-NEXT: ptrue p0.s
421 ; SVE-NEXT: sxtb z1.s, p0/m, z1.s
422 ; SVE-NEXT: sxtb z0.s, p0/m, z0.s
423 ; SVE-NEXT: add z0.s, z0.s, z1.s
424 ; SVE-NEXT: asr z0.s, z0.s, #1
427 ; SVE2-LABEL: hadds_v4i8:
428 ; SVE2: // %bb.0: // %entry
429 ; SVE2-NEXT: ptrue p0.s
430 ; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
431 ; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
432 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
435 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
436 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
437 %m = add nsw <vscale x 4 x i16> %s0s, %s1s
438 %s = ashr <vscale x 4 x i16> %m, splat (i16 1)
439 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
440 ret <vscale x 4 x i8> %s2
443 define <vscale x 4 x i8> @hadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
444 ; CHECK-LABEL: hadds_v4i8_lsh:
445 ; CHECK: // %bb.0: // %entry
446 ; CHECK-NEXT: ptrue p0.s
447 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
448 ; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
449 ; CHECK-NEXT: add z0.s, z0.s, z1.s
450 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
451 ; CHECK-NEXT: lsr z0.s, z0.s, #1
454 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
455 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
456 %m = add nsw <vscale x 4 x i16> %s0s, %s1s
457 %s = lshr <vscale x 4 x i16> %m, splat (i16 1)
458 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
459 ret <vscale x 4 x i8> %s2
462 define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
463 ; SVE-LABEL: haddu_v4i8:
464 ; SVE: // %bb.0: // %entry
465 ; SVE-NEXT: and z1.s, z1.s, #0xff
466 ; SVE-NEXT: and z0.s, z0.s, #0xff
467 ; SVE-NEXT: add z0.s, z0.s, z1.s
468 ; SVE-NEXT: lsr z0.s, z0.s, #1
471 ; SVE2-LABEL: haddu_v4i8:
472 ; SVE2: // %bb.0: // %entry
473 ; SVE2-NEXT: and z1.s, z1.s, #0xff
474 ; SVE2-NEXT: and z0.s, z0.s, #0xff
475 ; SVE2-NEXT: ptrue p0.s
476 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
479 %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
480 %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
481 %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
482 %s = lshr <vscale x 4 x i16> %m, splat (i16 1)
483 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
484 ret <vscale x 4 x i8> %s2
487 define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
488 ; SVE-LABEL: hadds_v8i8:
489 ; SVE: // %bb.0: // %entry
490 ; SVE-NEXT: ptrue p0.h
491 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
492 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
493 ; SVE-NEXT: add z0.h, z0.h, z1.h
494 ; SVE-NEXT: asr z0.h, z0.h, #1
497 ; SVE2-LABEL: hadds_v8i8:
498 ; SVE2: // %bb.0: // %entry
499 ; SVE2-NEXT: ptrue p0.h
500 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
501 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
502 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
505 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
506 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
507 %m = add nsw <vscale x 8 x i16> %s0s, %s1s
508 %s = ashr <vscale x 8 x i16> %m, splat (i16 1)
509 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
510 ret <vscale x 8 x i8> %s2
513 define <vscale x 8 x i8> @hadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
514 ; CHECK-LABEL: hadds_v8i8_lsh:
515 ; CHECK: // %bb.0: // %entry
516 ; CHECK-NEXT: ptrue p0.h
517 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
518 ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
519 ; CHECK-NEXT: add z0.h, z0.h, z1.h
520 ; CHECK-NEXT: lsr z0.h, z0.h, #1
523 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
524 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
525 %m = add nsw <vscale x 8 x i16> %s0s, %s1s
526 %s = lshr <vscale x 8 x i16> %m, splat (i16 1)
527 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
528 ret <vscale x 8 x i8> %s2
531 define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
532 ; SVE-LABEL: haddu_v8i8:
533 ; SVE: // %bb.0: // %entry
534 ; SVE-NEXT: and z1.h, z1.h, #0xff
535 ; SVE-NEXT: and z0.h, z0.h, #0xff
536 ; SVE-NEXT: add z0.h, z0.h, z1.h
537 ; SVE-NEXT: lsr z0.h, z0.h, #1
540 ; SVE2-LABEL: haddu_v8i8:
541 ; SVE2: // %bb.0: // %entry
542 ; SVE2-NEXT: and z1.h, z1.h, #0xff
543 ; SVE2-NEXT: and z0.h, z0.h, #0xff
544 ; SVE2-NEXT: ptrue p0.h
545 ; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
548 %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
549 %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
550 %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
551 %s = lshr <vscale x 8 x i16> %m, splat (i16 1)
552 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
553 ret <vscale x 8 x i8> %s2
556 define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
557 ; SVE-LABEL: hadds_v16i8:
558 ; SVE: // %bb.0: // %entry
559 ; SVE-NEXT: eor z2.d, z0.d, z1.d
560 ; SVE-NEXT: and z0.d, z0.d, z1.d
561 ; SVE-NEXT: asr z1.b, z2.b, #1
562 ; SVE-NEXT: add z0.b, z0.b, z1.b
565 ; SVE2-LABEL: hadds_v16i8:
566 ; SVE2: // %bb.0: // %entry
567 ; SVE2-NEXT: ptrue p0.b
568 ; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
571 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
572 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
573 %m = add nsw <vscale x 16 x i16> %s0s, %s1s
574 %s = ashr <vscale x 16 x i16> %m, splat (i16 1)
575 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
576 ret <vscale x 16 x i8> %s2
579 define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
580 ; SVE-LABEL: hadds_v16i8_lsh:
581 ; SVE: // %bb.0: // %entry
582 ; SVE-NEXT: eor z2.d, z0.d, z1.d
583 ; SVE-NEXT: and z0.d, z0.d, z1.d
584 ; SVE-NEXT: asr z1.b, z2.b, #1
585 ; SVE-NEXT: add z0.b, z0.b, z1.b
588 ; SVE2-LABEL: hadds_v16i8_lsh:
589 ; SVE2: // %bb.0: // %entry
590 ; SVE2-NEXT: ptrue p0.b
591 ; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
594 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
595 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
596 %m = add nsw <vscale x 16 x i16> %s0s, %s1s
597 %s = lshr <vscale x 16 x i16> %m, splat (i16 1)
598 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
599 ret <vscale x 16 x i8> %s2
602 define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
603 ; SVE-LABEL: haddu_v16i8:
604 ; SVE: // %bb.0: // %entry
605 ; SVE-NEXT: eor z2.d, z0.d, z1.d
606 ; SVE-NEXT: and z0.d, z0.d, z1.d
607 ; SVE-NEXT: lsr z1.b, z2.b, #1
608 ; SVE-NEXT: add z0.b, z0.b, z1.b
611 ; SVE2-LABEL: haddu_v16i8:
612 ; SVE2: // %bb.0: // %entry
613 ; SVE2-NEXT: ptrue p0.b
614 ; SVE2-NEXT: uhadd z0.b, p0/m, z0.b, z1.b
617 %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
618 %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
619 %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
620 %s = lshr <vscale x 16 x i16> %m, splat (i16 1)
621 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
622 ret <vscale x 16 x i8> %s2
625 define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
626 ; SVE-LABEL: rhadds_v2i64:
627 ; SVE: // %bb.0: // %entry
628 ; SVE-NEXT: eor z2.d, z0.d, z1.d
629 ; SVE-NEXT: orr z0.d, z0.d, z1.d
630 ; SVE-NEXT: asr z1.d, z2.d, #1
631 ; SVE-NEXT: sub z0.d, z0.d, z1.d
634 ; SVE2-LABEL: rhadds_v2i64:
635 ; SVE2: // %bb.0: // %entry
636 ; SVE2-NEXT: ptrue p0.d
637 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
640 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
641 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
642 %add = add <vscale x 2 x i128> %s0s, splat (i128 1)
643 %add2 = add <vscale x 2 x i128> %add, %s1s
644 %s = ashr <vscale x 2 x i128> %add2, splat (i128 1)
645 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
646 ret <vscale x 2 x i64> %result
649 define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
650 ; SVE-LABEL: rhadds_v2i64_lsh:
651 ; SVE: // %bb.0: // %entry
652 ; SVE-NEXT: eor z2.d, z0.d, z1.d
653 ; SVE-NEXT: orr z0.d, z0.d, z1.d
654 ; SVE-NEXT: asr z1.d, z2.d, #1
655 ; SVE-NEXT: sub z0.d, z0.d, z1.d
658 ; SVE2-LABEL: rhadds_v2i64_lsh:
659 ; SVE2: // %bb.0: // %entry
660 ; SVE2-NEXT: ptrue p0.d
661 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
664 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
665 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
666 %add = add <vscale x 2 x i128> %s0s, splat (i128 1)
667 %add2 = add <vscale x 2 x i128> %add, %s1s
668 %s = lshr <vscale x 2 x i128> %add2, splat (i128 1)
669 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
670 ret <vscale x 2 x i64> %result
673 define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
674 ; SVE-LABEL: rhaddu_v2i64:
675 ; SVE: // %bb.0: // %entry
676 ; SVE-NEXT: eor z2.d, z0.d, z1.d
677 ; SVE-NEXT: orr z0.d, z0.d, z1.d
678 ; SVE-NEXT: lsr z1.d, z2.d, #1
679 ; SVE-NEXT: sub z0.d, z0.d, z1.d
682 ; SVE2-LABEL: rhaddu_v2i64:
683 ; SVE2: // %bb.0: // %entry
684 ; SVE2-NEXT: ptrue p0.d
685 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
688 %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
689 %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
690 %add = add nuw nsw <vscale x 2 x i128> %s0s, splat (i128 1)
691 %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
692 %s = lshr <vscale x 2 x i128> %add2, splat (i128 1)
693 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
694 ret <vscale x 2 x i64> %result
697 define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
698 ; SVE-LABEL: rhadds_v2i32:
699 ; SVE: // %bb.0: // %entry
700 ; SVE-NEXT: ptrue p0.d
701 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
702 ; SVE-NEXT: sxtw z0.d, p0/m, z0.d
703 ; SVE-NEXT: sxtw z1.d, p0/m, z1.d
704 ; SVE-NEXT: eor z0.d, z0.d, z2.d
705 ; SVE-NEXT: sub z0.d, z1.d, z0.d
706 ; SVE-NEXT: asr z0.d, z0.d, #1
709 ; SVE2-LABEL: rhadds_v2i32:
710 ; SVE2: // %bb.0: // %entry
711 ; SVE2-NEXT: ptrue p0.d
712 ; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
713 ; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
714 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
717 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
718 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
719 %add = add <vscale x 2 x i64> %s0s, splat (i64 1)
720 %add2 = add <vscale x 2 x i64> %add, %s1s
721 %s = ashr <vscale x 2 x i64> %add2, splat (i64 1)
722 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
723 ret <vscale x 2 x i32> %result
726 define <vscale x 2 x i32> @rhadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
727 ; CHECK-LABEL: rhadds_v2i32_lsh:
728 ; CHECK: // %bb.0: // %entry
729 ; CHECK-NEXT: ptrue p0.d
730 ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
731 ; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
732 ; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
733 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
734 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
735 ; CHECK-NEXT: lsr z0.d, z0.d, #1
738 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
739 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
740 %add = add <vscale x 2 x i64> %s0s, splat (i64 1)
741 %add2 = add <vscale x 2 x i64> %add, %s1s
742 %s = lshr <vscale x 2 x i64> %add2, splat (i64 1)
743 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
744 ret <vscale x 2 x i32> %result
747 define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
748 ; SVE-LABEL: rhaddu_v2i32:
749 ; SVE: // %bb.0: // %entry
750 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
751 ; SVE-NEXT: and z0.d, z0.d, #0xffffffff
752 ; SVE-NEXT: and z1.d, z1.d, #0xffffffff
753 ; SVE-NEXT: eor z0.d, z0.d, z2.d
754 ; SVE-NEXT: sub z0.d, z1.d, z0.d
755 ; SVE-NEXT: lsr z0.d, z0.d, #1
758 ; SVE2-LABEL: rhaddu_v2i32:
759 ; SVE2: // %bb.0: // %entry
760 ; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
761 ; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
762 ; SVE2-NEXT: ptrue p0.d
763 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
766 %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
767 %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
768 %add = add nuw nsw <vscale x 2 x i64> %s0s, splat (i64 1)
769 %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
770 %s = lshr <vscale x 2 x i64> %add2, splat (i64 1)
771 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
772 ret <vscale x 2 x i32> %result
775 define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
776 ; SVE-LABEL: rhadds_v4i32:
777 ; SVE: // %bb.0: // %entry
778 ; SVE-NEXT: eor z2.d, z0.d, z1.d
779 ; SVE-NEXT: orr z0.d, z0.d, z1.d
780 ; SVE-NEXT: asr z1.s, z2.s, #1
781 ; SVE-NEXT: sub z0.s, z0.s, z1.s
784 ; SVE2-LABEL: rhadds_v4i32:
785 ; SVE2: // %bb.0: // %entry
786 ; SVE2-NEXT: ptrue p0.s
787 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
790 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
791 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
792 %add = add <vscale x 4 x i64> %s0s, splat (i64 1)
793 %add2 = add <vscale x 4 x i64> %add, %s1s
794 %s = ashr <vscale x 4 x i64> %add2, splat (i64 1)
795 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
796 ret <vscale x 4 x i32> %result
799 define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
800 ; SVE-LABEL: rhadds_v4i32_lsh:
801 ; SVE: // %bb.0: // %entry
802 ; SVE-NEXT: eor z2.d, z0.d, z1.d
803 ; SVE-NEXT: orr z0.d, z0.d, z1.d
804 ; SVE-NEXT: asr z1.s, z2.s, #1
805 ; SVE-NEXT: sub z0.s, z0.s, z1.s
808 ; SVE2-LABEL: rhadds_v4i32_lsh:
809 ; SVE2: // %bb.0: // %entry
810 ; SVE2-NEXT: ptrue p0.s
811 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
814 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
815 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
816 %add = add <vscale x 4 x i64> %s0s, splat (i64 1)
817 %add2 = add <vscale x 4 x i64> %add, %s1s
818 %s = lshr <vscale x 4 x i64> %add2, splat (i64 1)
819 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
820 ret <vscale x 4 x i32> %result
823 define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
824 ; SVE-LABEL: rhaddu_v4i32:
825 ; SVE: // %bb.0: // %entry
826 ; SVE-NEXT: eor z2.d, z0.d, z1.d
827 ; SVE-NEXT: orr z0.d, z0.d, z1.d
828 ; SVE-NEXT: lsr z1.s, z2.s, #1
829 ; SVE-NEXT: sub z0.s, z0.s, z1.s
832 ; SVE2-LABEL: rhaddu_v4i32:
833 ; SVE2: // %bb.0: // %entry
834 ; SVE2-NEXT: ptrue p0.s
835 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
838 %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
839 %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
840 %add = add nuw nsw <vscale x 4 x i64> %s0s, splat (i64 1)
841 %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
842 %s = lshr <vscale x 4 x i64> %add2, splat (i64 1)
843 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
844 ret <vscale x 4 x i32> %result
847 define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
848 ; SVE-LABEL: rhadds_v2i16:
849 ; SVE: // %bb.0: // %entry
850 ; SVE-NEXT: ptrue p0.d
851 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
852 ; SVE-NEXT: sxth z0.d, p0/m, z0.d
853 ; SVE-NEXT: sxth z1.d, p0/m, z1.d
854 ; SVE-NEXT: eor z0.d, z0.d, z2.d
855 ; SVE-NEXT: sub z0.d, z1.d, z0.d
856 ; SVE-NEXT: asr z0.d, z0.d, #1
859 ; SVE2-LABEL: rhadds_v2i16:
860 ; SVE2: // %bb.0: // %entry
861 ; SVE2-NEXT: ptrue p0.d
862 ; SVE2-NEXT: sxth z1.d, p0/m, z1.d
863 ; SVE2-NEXT: sxth z0.d, p0/m, z0.d
864 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
867 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
868 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
869 %add = add <vscale x 2 x i32> %s0s, splat (i32 1)
870 %add2 = add <vscale x 2 x i32> %add, %s1s
871 %s = ashr <vscale x 2 x i32> %add2, splat (i32 1)
872 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
873 ret <vscale x 2 x i16> %result
876 define <vscale x 2 x i16> @rhadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
877 ; CHECK-LABEL: rhadds_v2i16_lsh:
878 ; CHECK: // %bb.0: // %entry
879 ; CHECK-NEXT: ptrue p0.d
880 ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
881 ; CHECK-NEXT: sxth z0.d, p0/m, z0.d
882 ; CHECK-NEXT: sxth z1.d, p0/m, z1.d
883 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
884 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
885 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
886 ; CHECK-NEXT: lsr z0.d, z0.d, #1
889 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
890 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
891 %add = add <vscale x 2 x i32> %s0s, splat (i32 1)
892 %add2 = add <vscale x 2 x i32> %add, %s1s
893 %s = lshr <vscale x 2 x i32> %add2, splat (i32 1)
894 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
895 ret <vscale x 2 x i16> %result
898 define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
899 ; SVE-LABEL: rhaddu_v2i16:
900 ; SVE: // %bb.0: // %entry
901 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
902 ; SVE-NEXT: and z0.d, z0.d, #0xffff
903 ; SVE-NEXT: and z1.d, z1.d, #0xffff
904 ; SVE-NEXT: eor z0.d, z0.d, z2.d
905 ; SVE-NEXT: sub z0.d, z1.d, z0.d
906 ; SVE-NEXT: lsr z0.d, z0.d, #1
909 ; SVE2-LABEL: rhaddu_v2i16:
910 ; SVE2: // %bb.0: // %entry
911 ; SVE2-NEXT: and z1.d, z1.d, #0xffff
912 ; SVE2-NEXT: and z0.d, z0.d, #0xffff
913 ; SVE2-NEXT: ptrue p0.d
914 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
917 %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
918 %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
919 %add = add nuw nsw <vscale x 2 x i32> %s0s, splat (i32 1)
920 %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
921 %s = lshr <vscale x 2 x i32> %add2, splat (i32 1)
922 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
923 ret <vscale x 2 x i16> %result
926 define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
927 ; SVE-LABEL: rhadds_v4i16:
928 ; SVE: // %bb.0: // %entry
929 ; SVE-NEXT: ptrue p0.s
930 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
931 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
932 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
933 ; SVE-NEXT: eor z0.d, z0.d, z2.d
934 ; SVE-NEXT: sub z0.s, z1.s, z0.s
935 ; SVE-NEXT: asr z0.s, z0.s, #1
938 ; SVE2-LABEL: rhadds_v4i16:
939 ; SVE2: // %bb.0: // %entry
940 ; SVE2-NEXT: ptrue p0.s
941 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
942 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
943 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
946 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
947 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
948 %add = add <vscale x 4 x i32> %s0s, splat (i32 1)
949 %add2 = add <vscale x 4 x i32> %add, %s1s
950 %s = ashr <vscale x 4 x i32> %add2, splat (i32 1)
951 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
952 ret <vscale x 4 x i16> %result
955 define <vscale x 4 x i16> @rhadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
956 ; CHECK-LABEL: rhadds_v4i16_lsh:
957 ; CHECK: // %bb.0: // %entry
958 ; CHECK-NEXT: ptrue p0.s
959 ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
960 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s
961 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s
962 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
963 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
964 ; CHECK-NEXT: lsr z0.s, z0.s, #1
967 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
968 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
969 %add = add <vscale x 4 x i32> %s0s, splat (i32 1)
970 %add2 = add <vscale x 4 x i32> %add, %s1s
971 %s = lshr <vscale x 4 x i32> %add2, splat (i32 1)
972 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
973 ret <vscale x 4 x i16> %result
976 define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
977 ; SVE-LABEL: rhaddu_v4i16:
978 ; SVE: // %bb.0: // %entry
979 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
980 ; SVE-NEXT: and z0.s, z0.s, #0xffff
981 ; SVE-NEXT: and z1.s, z1.s, #0xffff
982 ; SVE-NEXT: eor z0.d, z0.d, z2.d
983 ; SVE-NEXT: sub z0.s, z1.s, z0.s
984 ; SVE-NEXT: lsr z0.s, z0.s, #1
987 ; SVE2-LABEL: rhaddu_v4i16:
988 ; SVE2: // %bb.0: // %entry
989 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
990 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
991 ; SVE2-NEXT: ptrue p0.s
992 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
995 %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
996 %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
997 %add = add nuw nsw <vscale x 4 x i32> %s0s, splat (i32 1)
998 %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
999 %s = lshr <vscale x 4 x i32> %add2, splat (i32 1)
1000 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
1001 ret <vscale x 4 x i16> %result
1004 define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1005 ; SVE-LABEL: rhadds_v8i16:
1006 ; SVE: // %bb.0: // %entry
1007 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1008 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1009 ; SVE-NEXT: asr z1.h, z2.h, #1
1010 ; SVE-NEXT: sub z0.h, z0.h, z1.h
1013 ; SVE2-LABEL: rhadds_v8i16:
1014 ; SVE2: // %bb.0: // %entry
1015 ; SVE2-NEXT: ptrue p0.h
1016 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1019 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1020 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1021 %add = add <vscale x 8 x i32> %s0s, splat (i32 1)
1022 %add2 = add <vscale x 8 x i32> %add, %s1s
1023 %s = ashr <vscale x 8 x i32> %add2, splat (i32 1)
1024 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1025 ret <vscale x 8 x i16> %result
1028 define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1029 ; SVE-LABEL: rhadds_v8i16_lsh:
1030 ; SVE: // %bb.0: // %entry
1031 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1032 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1033 ; SVE-NEXT: asr z1.h, z2.h, #1
1034 ; SVE-NEXT: sub z0.h, z0.h, z1.h
1037 ; SVE2-LABEL: rhadds_v8i16_lsh:
1038 ; SVE2: // %bb.0: // %entry
1039 ; SVE2-NEXT: ptrue p0.h
1040 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1043 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1044 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1045 %add = add <vscale x 8 x i32> %s0s, splat (i32 1)
1046 %add2 = add <vscale x 8 x i32> %add, %s1s
1047 %s = lshr <vscale x 8 x i32> %add2, splat (i32 1)
1048 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1049 ret <vscale x 8 x i16> %result
1052 define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1053 ; SVE-LABEL: rhaddu_v8i16:
1054 ; SVE: // %bb.0: // %entry
1055 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1056 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1057 ; SVE-NEXT: lsr z1.h, z2.h, #1
1058 ; SVE-NEXT: sub z0.h, z0.h, z1.h
1061 ; SVE2-LABEL: rhaddu_v8i16:
1062 ; SVE2: // %bb.0: // %entry
1063 ; SVE2-NEXT: ptrue p0.h
1064 ; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
1067 %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1068 %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1069 %add = add nuw nsw <vscale x 8 x i32> %s0s, splat (i32 1)
1070 %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
1071 %s = lshr <vscale x 8 x i32> %add2, splat (i32 1)
1072 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1073 ret <vscale x 8 x i16> %result
1076 define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1077 ; SVE-LABEL: rhadds_v4i8:
1078 ; SVE: // %bb.0: // %entry
1079 ; SVE-NEXT: ptrue p0.s
1080 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1081 ; SVE-NEXT: sxtb z0.s, p0/m, z0.s
1082 ; SVE-NEXT: sxtb z1.s, p0/m, z1.s
1083 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1084 ; SVE-NEXT: sub z0.s, z1.s, z0.s
1085 ; SVE-NEXT: asr z0.s, z0.s, #1
1088 ; SVE2-LABEL: rhadds_v4i8:
1089 ; SVE2: // %bb.0: // %entry
1090 ; SVE2-NEXT: ptrue p0.s
1091 ; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
1092 ; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
1093 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
1096 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1097 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1098 %add = add <vscale x 4 x i16> %s0s, splat (i16 1)
1099 %add2 = add <vscale x 4 x i16> %add, %s1s
1100 %s = ashr <vscale x 4 x i16> %add2, splat (i16 1)
1101 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1102 ret <vscale x 4 x i8> %result
1105 define <vscale x 4 x i8> @rhadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1106 ; CHECK-LABEL: rhadds_v4i8_lsh:
1107 ; CHECK: // %bb.0: // %entry
1108 ; CHECK-NEXT: ptrue p0.s
1109 ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1110 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
1111 ; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
1112 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
1113 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
1114 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
1115 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1118 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1119 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1120 %add = add <vscale x 4 x i16> %s0s, splat (i16 1)
1121 %add2 = add <vscale x 4 x i16> %add, %s1s
1122 %s = lshr <vscale x 4 x i16> %add2, splat (i16 1)
1123 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1124 ret <vscale x 4 x i8> %result
1127 define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1128 ; SVE-LABEL: rhaddu_v4i8:
1129 ; SVE: // %bb.0: // %entry
1130 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1131 ; SVE-NEXT: and z0.s, z0.s, #0xff
1132 ; SVE-NEXT: and z1.s, z1.s, #0xff
1133 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1134 ; SVE-NEXT: sub z0.s, z1.s, z0.s
1135 ; SVE-NEXT: lsr z0.s, z0.s, #1
1138 ; SVE2-LABEL: rhaddu_v4i8:
1139 ; SVE2: // %bb.0: // %entry
1140 ; SVE2-NEXT: and z1.s, z1.s, #0xff
1141 ; SVE2-NEXT: and z0.s, z0.s, #0xff
1142 ; SVE2-NEXT: ptrue p0.s
1143 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
1146 %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1147 %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1148 %add = add nuw nsw <vscale x 4 x i16> %s0s, splat (i16 1)
1149 %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
1150 %s = lshr <vscale x 4 x i16> %add2, splat (i16 1)
1151 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1152 ret <vscale x 4 x i8> %result
1155 define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1156 ; SVE-LABEL: rhadds_v8i8:
1157 ; SVE: // %bb.0: // %entry
1158 ; SVE-NEXT: ptrue p0.h
1159 ; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1160 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
1161 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
1162 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1163 ; SVE-NEXT: sub z0.h, z1.h, z0.h
1164 ; SVE-NEXT: asr z0.h, z0.h, #1
1167 ; SVE2-LABEL: rhadds_v8i8:
1168 ; SVE2: // %bb.0: // %entry
1169 ; SVE2-NEXT: ptrue p0.h
1170 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
1171 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
1172 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1175 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1176 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1177 %add = add <vscale x 8 x i16> %s0s, splat (i16 1)
1178 %add2 = add <vscale x 8 x i16> %add, %s1s
1179 %s = ashr <vscale x 8 x i16> %add2, splat (i16 1)
1180 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1181 ret <vscale x 8 x i8> %result
1184 define <vscale x 8 x i8> @rhadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1185 ; CHECK-LABEL: rhadds_v8i8_lsh:
1186 ; CHECK: // %bb.0: // %entry
1187 ; CHECK-NEXT: ptrue p0.h
1188 ; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1189 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
1190 ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
1191 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
1192 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
1193 ; CHECK-NEXT: lsr z0.h, z0.h, #1
1196 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1197 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1198 %add = add <vscale x 8 x i16> %s0s, splat (i16 1)
1199 %add2 = add <vscale x 8 x i16> %add, %s1s
1200 %s = lshr <vscale x 8 x i16> %add2, splat (i16 1)
1201 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1202 ret <vscale x 8 x i8> %result
1205 define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1206 ; SVE-LABEL: rhaddu_v8i8:
1207 ; SVE: // %bb.0: // %entry
1208 ; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1209 ; SVE-NEXT: and z0.h, z0.h, #0xff
1210 ; SVE-NEXT: and z1.h, z1.h, #0xff
1211 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1212 ; SVE-NEXT: sub z0.h, z1.h, z0.h
1213 ; SVE-NEXT: lsr z0.h, z0.h, #1
1216 ; SVE2-LABEL: rhaddu_v8i8:
1217 ; SVE2: // %bb.0: // %entry
1218 ; SVE2-NEXT: and z1.h, z1.h, #0xff
1219 ; SVE2-NEXT: and z0.h, z0.h, #0xff
1220 ; SVE2-NEXT: ptrue p0.h
1221 ; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
1224 %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1225 %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1226 %add = add nuw nsw <vscale x 8 x i16> %s0s, splat (i16 1)
1227 %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
1228 %s = lshr <vscale x 8 x i16> %add2, splat (i16 1)
1229 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1230 ret <vscale x 8 x i8> %result
1233 define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1234 ; SVE-LABEL: rhadds_v16i8:
1235 ; SVE: // %bb.0: // %entry
1236 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1237 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1238 ; SVE-NEXT: asr z1.b, z2.b, #1
1239 ; SVE-NEXT: sub z0.b, z0.b, z1.b
1242 ; SVE2-LABEL: rhadds_v16i8:
1243 ; SVE2: // %bb.0: // %entry
1244 ; SVE2-NEXT: ptrue p0.b
1245 ; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
1248 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1249 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1250 %add = add <vscale x 16 x i16> %s0s, splat (i16 1)
1251 %add2 = add <vscale x 16 x i16> %add, %s1s
1252 %s = ashr <vscale x 16 x i16> %add2, splat (i16 1)
1253 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1254 ret <vscale x 16 x i8> %result
1257 define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1258 ; SVE-LABEL: rhadds_v16i8_lsh:
1259 ; SVE: // %bb.0: // %entry
1260 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1261 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1262 ; SVE-NEXT: asr z1.b, z2.b, #1
1263 ; SVE-NEXT: sub z0.b, z0.b, z1.b
1266 ; SVE2-LABEL: rhadds_v16i8_lsh:
1267 ; SVE2: // %bb.0: // %entry
1268 ; SVE2-NEXT: ptrue p0.b
1269 ; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
1272 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1273 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1274 %add = add <vscale x 16 x i16> %s0s, splat (i16 1)
1275 %add2 = add <vscale x 16 x i16> %add, %s1s
1276 %s = lshr <vscale x 16 x i16> %add2, splat (i16 1)
1277 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1278 ret <vscale x 16 x i8> %result
1281 define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1282 ; SVE-LABEL: rhaddu_v16i8:
1283 ; SVE: // %bb.0: // %entry
1284 ; SVE-NEXT: eor z2.d, z0.d, z1.d
1285 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1286 ; SVE-NEXT: lsr z1.b, z2.b, #1
1287 ; SVE-NEXT: sub z0.b, z0.b, z1.b
1290 ; SVE2-LABEL: rhaddu_v16i8:
1291 ; SVE2: // %bb.0: // %entry
1292 ; SVE2-NEXT: ptrue p0.b
1293 ; SVE2-NEXT: urhadd z0.b, p0/m, z0.b, z1.b
1296 %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1297 %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1298 %add = add nuw nsw <vscale x 16 x i16> %s0s, splat (i16 1)
1299 %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
1300 %s = lshr <vscale x 16 x i16> %add2, splat (i16 1)
1301 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1302 ret <vscale x 16 x i8> %result