1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2
5 define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
6 ; SVE-LABEL: hadds_v2i64:
7 ; SVE: // %bb.0: // %entry
8 ; SVE-NEXT: asr z2.d, z1.d, #1
9 ; SVE-NEXT: asr z3.d, z0.d, #1
10 ; SVE-NEXT: and z0.d, z0.d, z1.d
11 ; SVE-NEXT: add z1.d, z3.d, z2.d
12 ; SVE-NEXT: and z0.d, z0.d, #0x1
13 ; SVE-NEXT: add z0.d, z1.d, z0.d
16 ; SVE2-LABEL: hadds_v2i64:
17 ; SVE2: // %bb.0: // %entry
18 ; SVE2-NEXT: ptrue p0.d
19 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
22 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
23 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
24 %m = add nsw <vscale x 2 x i128> %s0s, %s1s
25 %s = ashr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
26 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
27 ret <vscale x 2 x i64> %s2
30 define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
31 ; SVE-LABEL: hadds_v2i64_lsh:
32 ; SVE: // %bb.0: // %entry
33 ; SVE-NEXT: asr z2.d, z1.d, #1
34 ; SVE-NEXT: asr z3.d, z0.d, #1
35 ; SVE-NEXT: and z0.d, z0.d, z1.d
36 ; SVE-NEXT: add z1.d, z3.d, z2.d
37 ; SVE-NEXT: and z0.d, z0.d, #0x1
38 ; SVE-NEXT: add z0.d, z1.d, z0.d
41 ; SVE2-LABEL: hadds_v2i64_lsh:
42 ; SVE2: // %bb.0: // %entry
43 ; SVE2-NEXT: ptrue p0.d
44 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
47 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
48 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
49 %m = add nsw <vscale x 2 x i128> %s0s, %s1s
50 %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
51 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
52 ret <vscale x 2 x i64> %s2
55 define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
56 ; SVE-LABEL: haddu_v2i64:
57 ; SVE: // %bb.0: // %entry
58 ; SVE-NEXT: lsr z2.d, z1.d, #1
59 ; SVE-NEXT: lsr z3.d, z0.d, #1
60 ; SVE-NEXT: and z0.d, z0.d, z1.d
61 ; SVE-NEXT: add z1.d, z3.d, z2.d
62 ; SVE-NEXT: and z0.d, z0.d, #0x1
63 ; SVE-NEXT: add z0.d, z1.d, z0.d
66 ; SVE2-LABEL: haddu_v2i64:
67 ; SVE2: // %bb.0: // %entry
68 ; SVE2-NEXT: ptrue p0.d
69 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
72 %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
73 %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
74 %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
75 %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
76 %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
77 ret <vscale x 2 x i64> %s2
80 define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
81 ; SVE-LABEL: hadds_v2i32:
82 ; SVE: // %bb.0: // %entry
83 ; SVE-NEXT: ptrue p0.d
84 ; SVE-NEXT: sxtw z0.d, p0/m, z0.d
85 ; SVE-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
86 ; SVE-NEXT: asr z0.d, z0.d, #1
89 ; SVE2-LABEL: hadds_v2i32:
90 ; SVE2: // %bb.0: // %entry
91 ; SVE2-NEXT: ptrue p0.d
92 ; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
93 ; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
94 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
97 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
98 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
99 %m = add nsw <vscale x 2 x i64> %s0s, %s1s
100 %s = ashr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
101 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
102 ret <vscale x 2 x i32> %s2
105 define <vscale x 2 x i32> @hadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
106 ; CHECK-LABEL: hadds_v2i32_lsh:
107 ; CHECK: // %bb.0: // %entry
108 ; CHECK-NEXT: ptrue p0.d
109 ; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
110 ; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
111 ; CHECK-NEXT: lsr z0.d, z0.d, #1
114 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
115 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
116 %m = add nsw <vscale x 2 x i64> %s0s, %s1s
117 %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
118 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
119 ret <vscale x 2 x i32> %s2
122 define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
123 ; SVE-LABEL: haddu_v2i32:
124 ; SVE: // %bb.0: // %entry
125 ; SVE-NEXT: and z0.d, z0.d, #0xffffffff
126 ; SVE-NEXT: adr z0.d, [z0.d, z1.d, uxtw]
127 ; SVE-NEXT: lsr z0.d, z0.d, #1
130 ; SVE2-LABEL: haddu_v2i32:
131 ; SVE2: // %bb.0: // %entry
132 ; SVE2-NEXT: ptrue p0.d
133 ; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
134 ; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
135 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
138 %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
139 %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
140 %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
141 %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
142 %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
143 ret <vscale x 2 x i32> %s2
146 define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
147 ; SVE-LABEL: hadds_v4i32:
148 ; SVE: // %bb.0: // %entry
149 ; SVE-NEXT: asr z2.s, z1.s, #1
150 ; SVE-NEXT: asr z3.s, z0.s, #1
151 ; SVE-NEXT: and z0.d, z0.d, z1.d
152 ; SVE-NEXT: add z1.s, z3.s, z2.s
153 ; SVE-NEXT: and z0.s, z0.s, #0x1
154 ; SVE-NEXT: add z0.s, z1.s, z0.s
157 ; SVE2-LABEL: hadds_v4i32:
158 ; SVE2: // %bb.0: // %entry
159 ; SVE2-NEXT: ptrue p0.s
160 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
163 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
164 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
165 %m = add nsw <vscale x 4 x i64> %s0s, %s1s
166 %s = ashr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
167 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
168 ret <vscale x 4 x i32> %s2
171 define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
172 ; SVE-LABEL: hadds_v4i32_lsh:
173 ; SVE: // %bb.0: // %entry
174 ; SVE-NEXT: asr z2.s, z1.s, #1
175 ; SVE-NEXT: asr z3.s, z0.s, #1
176 ; SVE-NEXT: and z0.d, z0.d, z1.d
177 ; SVE-NEXT: add z1.s, z3.s, z2.s
178 ; SVE-NEXT: and z0.s, z0.s, #0x1
179 ; SVE-NEXT: add z0.s, z1.s, z0.s
182 ; SVE2-LABEL: hadds_v4i32_lsh:
183 ; SVE2: // %bb.0: // %entry
184 ; SVE2-NEXT: ptrue p0.s
185 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
188 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
189 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
190 %m = add nsw <vscale x 4 x i64> %s0s, %s1s
191 %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
192 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
193 ret <vscale x 4 x i32> %s2
196 define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
197 ; SVE-LABEL: haddu_v4i32:
198 ; SVE: // %bb.0: // %entry
199 ; SVE-NEXT: lsr z2.s, z1.s, #1
200 ; SVE-NEXT: lsr z3.s, z0.s, #1
201 ; SVE-NEXT: and z0.d, z0.d, z1.d
202 ; SVE-NEXT: add z1.s, z3.s, z2.s
203 ; SVE-NEXT: and z0.s, z0.s, #0x1
204 ; SVE-NEXT: add z0.s, z1.s, z0.s
207 ; SVE2-LABEL: haddu_v4i32:
208 ; SVE2: // %bb.0: // %entry
209 ; SVE2-NEXT: ptrue p0.s
210 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
213 %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
214 %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
215 %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
216 %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
217 %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
218 ret <vscale x 4 x i32> %s2
221 define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
222 ; SVE-LABEL: hadds_v2i16:
223 ; SVE: // %bb.0: // %entry
224 ; SVE-NEXT: ptrue p0.d
225 ; SVE-NEXT: sxth z0.d, p0/m, z0.d
226 ; SVE-NEXT: sxth z1.d, p0/m, z1.d
227 ; SVE-NEXT: add z0.d, z0.d, z1.d
228 ; SVE-NEXT: asr z0.d, z0.d, #1
231 ; SVE2-LABEL: hadds_v2i16:
232 ; SVE2: // %bb.0: // %entry
233 ; SVE2-NEXT: ptrue p0.d
234 ; SVE2-NEXT: sxth z0.d, p0/m, z0.d
235 ; SVE2-NEXT: sxth z1.d, p0/m, z1.d
236 ; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
239 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
240 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
241 %m = add nsw <vscale x 2 x i32> %s0s, %s1s
242 %s = ashr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
243 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
244 ret <vscale x 2 x i16> %s2
247 define <vscale x 2 x i16> @hadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
248 ; CHECK-LABEL: hadds_v2i16_lsh:
249 ; CHECK: // %bb.0: // %entry
250 ; CHECK-NEXT: ptrue p0.d
251 ; CHECK-NEXT: sxth z0.d, p0/m, z0.d
252 ; CHECK-NEXT: sxth z1.d, p0/m, z1.d
253 ; CHECK-NEXT: add z0.d, z0.d, z1.d
254 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
255 ; CHECK-NEXT: lsr z0.d, z0.d, #1
258 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
259 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
260 %m = add nsw <vscale x 2 x i32> %s0s, %s1s
261 %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
262 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
263 ret <vscale x 2 x i16> %s2
266 define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
267 ; SVE-LABEL: haddu_v2i16:
268 ; SVE: // %bb.0: // %entry
269 ; SVE-NEXT: and z0.d, z0.d, #0xffff
270 ; SVE-NEXT: and z1.d, z1.d, #0xffff
271 ; SVE-NEXT: add z0.d, z0.d, z1.d
272 ; SVE-NEXT: lsr z0.d, z0.d, #1
275 ; SVE2-LABEL: haddu_v2i16:
276 ; SVE2: // %bb.0: // %entry
277 ; SVE2-NEXT: ptrue p0.d
278 ; SVE2-NEXT: and z0.d, z0.d, #0xffff
279 ; SVE2-NEXT: and z1.d, z1.d, #0xffff
280 ; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
283 %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
284 %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
285 %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
286 %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
287 %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
288 ret <vscale x 2 x i16> %s2
291 define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
292 ; SVE-LABEL: hadds_v4i16:
293 ; SVE: // %bb.0: // %entry
294 ; SVE-NEXT: ptrue p0.s
295 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
296 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
297 ; SVE-NEXT: add z0.s, z0.s, z1.s
298 ; SVE-NEXT: asr z0.s, z0.s, #1
301 ; SVE2-LABEL: hadds_v4i16:
302 ; SVE2: // %bb.0: // %entry
303 ; SVE2-NEXT: ptrue p0.s
304 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
305 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
306 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
309 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
310 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
311 %m = add nsw <vscale x 4 x i32> %s0s, %s1s
312 %s = ashr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
313 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
314 ret <vscale x 4 x i16> %s2
317 define <vscale x 4 x i16> @hadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
318 ; CHECK-LABEL: hadds_v4i16_lsh:
319 ; CHECK: // %bb.0: // %entry
320 ; CHECK-NEXT: ptrue p0.s
321 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s
322 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s
323 ; CHECK-NEXT: add z0.s, z0.s, z1.s
324 ; CHECK-NEXT: lsr z0.s, z0.s, #1
327 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
328 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
329 %m = add nsw <vscale x 4 x i32> %s0s, %s1s
330 %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
331 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
332 ret <vscale x 4 x i16> %s2
335 define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
336 ; SVE-LABEL: haddu_v4i16:
337 ; SVE: // %bb.0: // %entry
338 ; SVE-NEXT: and z0.s, z0.s, #0xffff
339 ; SVE-NEXT: and z1.s, z1.s, #0xffff
340 ; SVE-NEXT: add z0.s, z0.s, z1.s
341 ; SVE-NEXT: lsr z0.s, z0.s, #1
344 ; SVE2-LABEL: haddu_v4i16:
345 ; SVE2: // %bb.0: // %entry
346 ; SVE2-NEXT: ptrue p0.s
347 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
348 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
349 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
352 %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
353 %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
354 %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
355 %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
356 %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
357 ret <vscale x 4 x i16> %s2
360 define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
361 ; SVE-LABEL: hadds_v8i16:
362 ; SVE: // %bb.0: // %entry
363 ; SVE-NEXT: asr z2.h, z1.h, #1
364 ; SVE-NEXT: asr z3.h, z0.h, #1
365 ; SVE-NEXT: and z0.d, z0.d, z1.d
366 ; SVE-NEXT: add z1.h, z3.h, z2.h
367 ; SVE-NEXT: and z0.h, z0.h, #0x1
368 ; SVE-NEXT: add z0.h, z1.h, z0.h
371 ; SVE2-LABEL: hadds_v8i16:
372 ; SVE2: // %bb.0: // %entry
373 ; SVE2-NEXT: ptrue p0.h
374 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
377 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
378 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
379 %m = add nsw <vscale x 8 x i32> %s0s, %s1s
380 %s = ashr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
381 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
382 ret <vscale x 8 x i16> %s2
385 define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
386 ; SVE-LABEL: hadds_v8i16_lsh:
387 ; SVE: // %bb.0: // %entry
388 ; SVE-NEXT: asr z2.h, z1.h, #1
389 ; SVE-NEXT: asr z3.h, z0.h, #1
390 ; SVE-NEXT: and z0.d, z0.d, z1.d
391 ; SVE-NEXT: add z1.h, z3.h, z2.h
392 ; SVE-NEXT: and z0.h, z0.h, #0x1
393 ; SVE-NEXT: add z0.h, z1.h, z0.h
396 ; SVE2-LABEL: hadds_v8i16_lsh:
397 ; SVE2: // %bb.0: // %entry
398 ; SVE2-NEXT: ptrue p0.h
399 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
402 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
403 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
404 %m = add nsw <vscale x 8 x i32> %s0s, %s1s
405 %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
406 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
407 ret <vscale x 8 x i16> %s2
410 define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
411 ; SVE-LABEL: haddu_v8i16:
412 ; SVE: // %bb.0: // %entry
413 ; SVE-NEXT: lsr z2.h, z1.h, #1
414 ; SVE-NEXT: lsr z3.h, z0.h, #1
415 ; SVE-NEXT: and z0.d, z0.d, z1.d
416 ; SVE-NEXT: add z1.h, z3.h, z2.h
417 ; SVE-NEXT: and z0.h, z0.h, #0x1
418 ; SVE-NEXT: add z0.h, z1.h, z0.h
421 ; SVE2-LABEL: haddu_v8i16:
422 ; SVE2: // %bb.0: // %entry
423 ; SVE2-NEXT: ptrue p0.h
424 ; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
427 %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
428 %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
429 %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
430 %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
431 %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
432 ret <vscale x 8 x i16> %s2
435 define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
436 ; SVE-LABEL: hadds_v4i8:
437 ; SVE: // %bb.0: // %entry
438 ; SVE-NEXT: ptrue p0.s
439 ; SVE-NEXT: sxtb z0.s, p0/m, z0.s
440 ; SVE-NEXT: sxtb z1.s, p0/m, z1.s
441 ; SVE-NEXT: add z0.s, z0.s, z1.s
442 ; SVE-NEXT: asr z0.s, z0.s, #1
445 ; SVE2-LABEL: hadds_v4i8:
446 ; SVE2: // %bb.0: // %entry
447 ; SVE2-NEXT: ptrue p0.s
448 ; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
449 ; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
450 ; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
453 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
454 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
455 %m = add nsw <vscale x 4 x i16> %s0s, %s1s
456 %s = ashr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
457 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
458 ret <vscale x 4 x i8> %s2
461 define <vscale x 4 x i8> @hadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
462 ; CHECK-LABEL: hadds_v4i8_lsh:
463 ; CHECK: // %bb.0: // %entry
464 ; CHECK-NEXT: ptrue p0.s
465 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
466 ; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
467 ; CHECK-NEXT: add z0.s, z0.s, z1.s
468 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
469 ; CHECK-NEXT: lsr z0.s, z0.s, #1
472 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
473 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
474 %m = add nsw <vscale x 4 x i16> %s0s, %s1s
475 %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
476 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
477 ret <vscale x 4 x i8> %s2
480 define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
481 ; SVE-LABEL: haddu_v4i8:
482 ; SVE: // %bb.0: // %entry
483 ; SVE-NEXT: and z0.s, z0.s, #0xff
484 ; SVE-NEXT: and z1.s, z1.s, #0xff
485 ; SVE-NEXT: add z0.s, z0.s, z1.s
486 ; SVE-NEXT: lsr z0.s, z0.s, #1
489 ; SVE2-LABEL: haddu_v4i8:
490 ; SVE2: // %bb.0: // %entry
491 ; SVE2-NEXT: ptrue p0.s
492 ; SVE2-NEXT: and z0.s, z0.s, #0xff
493 ; SVE2-NEXT: and z1.s, z1.s, #0xff
494 ; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
497 %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
498 %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
499 %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
500 %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
501 %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
502 ret <vscale x 4 x i8> %s2
505 define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
506 ; SVE-LABEL: hadds_v8i8:
507 ; SVE: // %bb.0: // %entry
508 ; SVE-NEXT: ptrue p0.h
509 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
510 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
511 ; SVE-NEXT: add z0.h, z0.h, z1.h
512 ; SVE-NEXT: asr z0.h, z0.h, #1
515 ; SVE2-LABEL: hadds_v8i8:
516 ; SVE2: // %bb.0: // %entry
517 ; SVE2-NEXT: ptrue p0.h
518 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
519 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
520 ; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
523 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
524 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
525 %m = add nsw <vscale x 8 x i16> %s0s, %s1s
526 %s = ashr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
527 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
528 ret <vscale x 8 x i8> %s2
531 define <vscale x 8 x i8> @hadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
532 ; CHECK-LABEL: hadds_v8i8_lsh:
533 ; CHECK: // %bb.0: // %entry
534 ; CHECK-NEXT: ptrue p0.h
535 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
536 ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
537 ; CHECK-NEXT: add z0.h, z0.h, z1.h
538 ; CHECK-NEXT: lsr z0.h, z0.h, #1
541 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
542 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
543 %m = add nsw <vscale x 8 x i16> %s0s, %s1s
544 %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
545 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
546 ret <vscale x 8 x i8> %s2
549 define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
550 ; SVE-LABEL: haddu_v8i8:
551 ; SVE: // %bb.0: // %entry
552 ; SVE-NEXT: and z0.h, z0.h, #0xff
553 ; SVE-NEXT: and z1.h, z1.h, #0xff
554 ; SVE-NEXT: add z0.h, z0.h, z1.h
555 ; SVE-NEXT: lsr z0.h, z0.h, #1
558 ; SVE2-LABEL: haddu_v8i8:
559 ; SVE2: // %bb.0: // %entry
560 ; SVE2-NEXT: ptrue p0.h
561 ; SVE2-NEXT: and z0.h, z0.h, #0xff
562 ; SVE2-NEXT: and z1.h, z1.h, #0xff
563 ; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
566 %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
567 %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
568 %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
569 %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
570 %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
571 ret <vscale x 8 x i8> %s2
574 define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
575 ; SVE-LABEL: hadds_v16i8:
576 ; SVE: // %bb.0: // %entry
577 ; SVE-NEXT: asr z2.b, z1.b, #1
578 ; SVE-NEXT: asr z3.b, z0.b, #1
579 ; SVE-NEXT: and z0.d, z0.d, z1.d
580 ; SVE-NEXT: add z1.b, z3.b, z2.b
581 ; SVE-NEXT: and z0.b, z0.b, #0x1
582 ; SVE-NEXT: add z0.b, z1.b, z0.b
585 ; SVE2-LABEL: hadds_v16i8:
586 ; SVE2: // %bb.0: // %entry
587 ; SVE2-NEXT: ptrue p0.b
588 ; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
591 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
592 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
593 %m = add nsw <vscale x 16 x i16> %s0s, %s1s
594 %s = ashr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
595 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
596 ret <vscale x 16 x i8> %s2
599 define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
600 ; SVE-LABEL: hadds_v16i8_lsh:
601 ; SVE: // %bb.0: // %entry
602 ; SVE-NEXT: asr z2.b, z1.b, #1
603 ; SVE-NEXT: asr z3.b, z0.b, #1
604 ; SVE-NEXT: and z0.d, z0.d, z1.d
605 ; SVE-NEXT: add z1.b, z3.b, z2.b
606 ; SVE-NEXT: and z0.b, z0.b, #0x1
607 ; SVE-NEXT: add z0.b, z1.b, z0.b
610 ; SVE2-LABEL: hadds_v16i8_lsh:
611 ; SVE2: // %bb.0: // %entry
612 ; SVE2-NEXT: ptrue p0.b
613 ; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
616 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
617 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
618 %m = add nsw <vscale x 16 x i16> %s0s, %s1s
619 %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
620 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
621 ret <vscale x 16 x i8> %s2
624 define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
625 ; SVE-LABEL: haddu_v16i8:
626 ; SVE: // %bb.0: // %entry
627 ; SVE-NEXT: lsr z2.b, z1.b, #1
628 ; SVE-NEXT: lsr z3.b, z0.b, #1
629 ; SVE-NEXT: and z0.d, z0.d, z1.d
630 ; SVE-NEXT: add z1.b, z3.b, z2.b
631 ; SVE-NEXT: and z0.b, z0.b, #0x1
632 ; SVE-NEXT: add z0.b, z1.b, z0.b
635 ; SVE2-LABEL: haddu_v16i8:
636 ; SVE2: // %bb.0: // %entry
637 ; SVE2-NEXT: ptrue p0.b
638 ; SVE2-NEXT: uhadd z0.b, p0/m, z0.b, z1.b
641 %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
642 %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
643 %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
644 %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
645 %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
646 ret <vscale x 16 x i8> %s2
649 define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
650 ; SVE-LABEL: rhadds_v2i64:
651 ; SVE: // %bb.0: // %entry
652 ; SVE-NEXT: asr z2.d, z1.d, #1
653 ; SVE-NEXT: asr z3.d, z0.d, #1
654 ; SVE-NEXT: orr z0.d, z0.d, z1.d
655 ; SVE-NEXT: add z1.d, z3.d, z2.d
656 ; SVE-NEXT: and z0.d, z0.d, #0x1
657 ; SVE-NEXT: add z0.d, z1.d, z0.d
660 ; SVE2-LABEL: rhadds_v2i64:
661 ; SVE2: // %bb.0: // %entry
662 ; SVE2-NEXT: ptrue p0.d
663 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
666 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
667 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
668 %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
669 %add2 = add <vscale x 2 x i128> %add, %s1s
670 %s = ashr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
671 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
672 ret <vscale x 2 x i64> %result
675 define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
676 ; SVE-LABEL: rhadds_v2i64_lsh:
677 ; SVE: // %bb.0: // %entry
678 ; SVE-NEXT: asr z2.d, z1.d, #1
679 ; SVE-NEXT: asr z3.d, z0.d, #1
680 ; SVE-NEXT: orr z0.d, z0.d, z1.d
681 ; SVE-NEXT: add z1.d, z3.d, z2.d
682 ; SVE-NEXT: and z0.d, z0.d, #0x1
683 ; SVE-NEXT: add z0.d, z1.d, z0.d
686 ; SVE2-LABEL: rhadds_v2i64_lsh:
687 ; SVE2: // %bb.0: // %entry
688 ; SVE2-NEXT: ptrue p0.d
689 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
692 %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
693 %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
694 %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
695 %add2 = add <vscale x 2 x i128> %add, %s1s
696 %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
697 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
698 ret <vscale x 2 x i64> %result
701 define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
702 ; SVE-LABEL: rhaddu_v2i64:
703 ; SVE: // %bb.0: // %entry
704 ; SVE-NEXT: lsr z2.d, z1.d, #1
705 ; SVE-NEXT: lsr z3.d, z0.d, #1
706 ; SVE-NEXT: orr z0.d, z0.d, z1.d
707 ; SVE-NEXT: add z1.d, z3.d, z2.d
708 ; SVE-NEXT: and z0.d, z0.d, #0x1
709 ; SVE-NEXT: add z0.d, z1.d, z0.d
712 ; SVE2-LABEL: rhaddu_v2i64:
713 ; SVE2: // %bb.0: // %entry
714 ; SVE2-NEXT: ptrue p0.d
715 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
718 %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
719 %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
720 %add = add nuw nsw <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
721 %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
722 %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
723 %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
724 ret <vscale x 2 x i64> %result
727 define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
728 ; SVE-LABEL: rhadds_v2i32:
729 ; SVE: // %bb.0: // %entry
730 ; SVE-NEXT: ptrue p0.d
731 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
732 ; SVE-NEXT: sxtw z0.d, p0/m, z0.d
733 ; SVE-NEXT: sxtw z1.d, p0/m, z1.d
734 ; SVE-NEXT: eor z0.d, z0.d, z2.d
735 ; SVE-NEXT: sub z0.d, z1.d, z0.d
736 ; SVE-NEXT: asr z0.d, z0.d, #1
739 ; SVE2-LABEL: rhadds_v2i32:
740 ; SVE2: // %bb.0: // %entry
741 ; SVE2-NEXT: ptrue p0.d
742 ; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
743 ; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
744 ; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
747 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
748 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
749 %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
750 %add2 = add <vscale x 2 x i64> %add, %s1s
751 %s = ashr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
752 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
753 ret <vscale x 2 x i32> %result
756 define <vscale x 2 x i32> @rhadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
757 ; CHECK-LABEL: rhadds_v2i32_lsh:
758 ; CHECK: // %bb.0: // %entry
759 ; CHECK-NEXT: ptrue p0.d
760 ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
761 ; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
762 ; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
763 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
764 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
765 ; CHECK-NEXT: lsr z0.d, z0.d, #1
768 %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
769 %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
770 %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
771 %add2 = add <vscale x 2 x i64> %add, %s1s
772 %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
773 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
774 ret <vscale x 2 x i32> %result
777 define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
778 ; SVE-LABEL: rhaddu_v2i32:
779 ; SVE: // %bb.0: // %entry
780 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
781 ; SVE-NEXT: and z0.d, z0.d, #0xffffffff
782 ; SVE-NEXT: and z1.d, z1.d, #0xffffffff
783 ; SVE-NEXT: eor z0.d, z0.d, z2.d
784 ; SVE-NEXT: sub z0.d, z1.d, z0.d
785 ; SVE-NEXT: lsr z0.d, z0.d, #1
788 ; SVE2-LABEL: rhaddu_v2i32:
789 ; SVE2: // %bb.0: // %entry
790 ; SVE2-NEXT: ptrue p0.d
791 ; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
792 ; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
793 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
796 %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
797 %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
798 %add = add nuw nsw <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
799 %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
800 %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
801 %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
802 ret <vscale x 2 x i32> %result
805 define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
806 ; SVE-LABEL: rhadds_v4i32:
807 ; SVE: // %bb.0: // %entry
808 ; SVE-NEXT: asr z2.s, z1.s, #1
809 ; SVE-NEXT: asr z3.s, z0.s, #1
810 ; SVE-NEXT: orr z0.d, z0.d, z1.d
811 ; SVE-NEXT: add z1.s, z3.s, z2.s
812 ; SVE-NEXT: and z0.s, z0.s, #0x1
813 ; SVE-NEXT: add z0.s, z1.s, z0.s
816 ; SVE2-LABEL: rhadds_v4i32:
817 ; SVE2: // %bb.0: // %entry
818 ; SVE2-NEXT: ptrue p0.s
819 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
822 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
823 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
824 %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
825 %add2 = add <vscale x 4 x i64> %add, %s1s
826 %s = ashr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
827 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
828 ret <vscale x 4 x i32> %result
831 define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
832 ; SVE-LABEL: rhadds_v4i32_lsh:
833 ; SVE: // %bb.0: // %entry
834 ; SVE-NEXT: asr z2.s, z1.s, #1
835 ; SVE-NEXT: asr z3.s, z0.s, #1
836 ; SVE-NEXT: orr z0.d, z0.d, z1.d
837 ; SVE-NEXT: add z1.s, z3.s, z2.s
838 ; SVE-NEXT: and z0.s, z0.s, #0x1
839 ; SVE-NEXT: add z0.s, z1.s, z0.s
842 ; SVE2-LABEL: rhadds_v4i32_lsh:
843 ; SVE2: // %bb.0: // %entry
844 ; SVE2-NEXT: ptrue p0.s
845 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
848 %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
849 %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
850 %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
851 %add2 = add <vscale x 4 x i64> %add, %s1s
852 %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
853 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
854 ret <vscale x 4 x i32> %result
857 define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
858 ; SVE-LABEL: rhaddu_v4i32:
859 ; SVE: // %bb.0: // %entry
860 ; SVE-NEXT: lsr z2.s, z1.s, #1
861 ; SVE-NEXT: lsr z3.s, z0.s, #1
862 ; SVE-NEXT: orr z0.d, z0.d, z1.d
863 ; SVE-NEXT: add z1.s, z3.s, z2.s
864 ; SVE-NEXT: and z0.s, z0.s, #0x1
865 ; SVE-NEXT: add z0.s, z1.s, z0.s
868 ; SVE2-LABEL: rhaddu_v4i32:
869 ; SVE2: // %bb.0: // %entry
870 ; SVE2-NEXT: ptrue p0.s
871 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
874 %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
875 %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
876 %add = add nuw nsw <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
877 %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
878 %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
879 %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
880 ret <vscale x 4 x i32> %result
883 define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
884 ; CHECK-LABEL: rhadds_v2i16:
885 ; CHECK: // %bb.0: // %entry
886 ; CHECK-NEXT: ptrue p0.d
887 ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
888 ; CHECK-NEXT: sxth z0.d, p0/m, z0.d
889 ; CHECK-NEXT: sxth z1.d, p0/m, z1.d
890 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
891 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
892 ; CHECK-NEXT: asr z0.d, z0.d, #1
895 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
896 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
897 %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
898 %add2 = add <vscale x 2 x i32> %add, %s1s
899 %s = ashr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
900 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
901 ret <vscale x 2 x i16> %result
904 define <vscale x 2 x i16> @rhadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
905 ; CHECK-LABEL: rhadds_v2i16_lsh:
906 ; CHECK: // %bb.0: // %entry
907 ; CHECK-NEXT: ptrue p0.d
908 ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
909 ; CHECK-NEXT: sxth z0.d, p0/m, z0.d
910 ; CHECK-NEXT: sxth z1.d, p0/m, z1.d
911 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
912 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
913 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
914 ; CHECK-NEXT: lsr z0.d, z0.d, #1
917 %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
918 %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
919 %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
920 %add2 = add <vscale x 2 x i32> %add, %s1s
921 %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
922 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
923 ret <vscale x 2 x i16> %result
926 define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
927 ; SVE-LABEL: rhaddu_v2i16:
928 ; SVE: // %bb.0: // %entry
929 ; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
930 ; SVE-NEXT: and z0.d, z0.d, #0xffff
931 ; SVE-NEXT: and z1.d, z1.d, #0xffff
932 ; SVE-NEXT: eor z0.d, z0.d, z2.d
933 ; SVE-NEXT: sub z0.d, z1.d, z0.d
934 ; SVE-NEXT: lsr z0.d, z0.d, #1
937 ; SVE2-LABEL: rhaddu_v2i16:
938 ; SVE2: // %bb.0: // %entry
939 ; SVE2-NEXT: ptrue p0.d
940 ; SVE2-NEXT: and z0.d, z0.d, #0xffff
941 ; SVE2-NEXT: and z1.d, z1.d, #0xffff
942 ; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
945 %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
946 %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
947 %add = add nuw nsw <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
948 %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
949 %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
950 %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
951 ret <vscale x 2 x i16> %result
954 define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
955 ; SVE-LABEL: rhadds_v4i16:
956 ; SVE: // %bb.0: // %entry
957 ; SVE-NEXT: ptrue p0.s
958 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
959 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
960 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
961 ; SVE-NEXT: eor z0.d, z0.d, z2.d
962 ; SVE-NEXT: sub z0.s, z1.s, z0.s
963 ; SVE-NEXT: asr z0.s, z0.s, #1
966 ; SVE2-LABEL: rhadds_v4i16:
967 ; SVE2: // %bb.0: // %entry
968 ; SVE2-NEXT: ptrue p0.s
969 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
970 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
971 ; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
974 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
975 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
976 %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
977 %add2 = add <vscale x 4 x i32> %add, %s1s
978 %s = ashr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
979 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
980 ret <vscale x 4 x i16> %result
983 define <vscale x 4 x i16> @rhadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
984 ; CHECK-LABEL: rhadds_v4i16_lsh:
985 ; CHECK: // %bb.0: // %entry
986 ; CHECK-NEXT: ptrue p0.s
987 ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
988 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s
989 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s
990 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
991 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
992 ; CHECK-NEXT: lsr z0.s, z0.s, #1
995 %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
996 %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
997 %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
998 %add2 = add <vscale x 4 x i32> %add, %s1s
999 %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
1000 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
1001 ret <vscale x 4 x i16> %result
1004 define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
1005 ; SVE-LABEL: rhaddu_v4i16:
1006 ; SVE: // %bb.0: // %entry
1007 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1008 ; SVE-NEXT: and z0.s, z0.s, #0xffff
1009 ; SVE-NEXT: and z1.s, z1.s, #0xffff
1010 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1011 ; SVE-NEXT: sub z0.s, z1.s, z0.s
1012 ; SVE-NEXT: lsr z0.s, z0.s, #1
1015 ; SVE2-LABEL: rhaddu_v4i16:
1016 ; SVE2: // %bb.0: // %entry
1017 ; SVE2-NEXT: ptrue p0.s
1018 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
1019 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
1020 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
1023 %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
1024 %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
1025 %add = add nuw nsw <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
1026 %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
1027 %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
1028 %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
1029 ret <vscale x 4 x i16> %result
1032 define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1033 ; SVE-LABEL: rhadds_v8i16:
1034 ; SVE: // %bb.0: // %entry
1035 ; SVE-NEXT: asr z2.h, z1.h, #1
1036 ; SVE-NEXT: asr z3.h, z0.h, #1
1037 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1038 ; SVE-NEXT: add z1.h, z3.h, z2.h
1039 ; SVE-NEXT: and z0.h, z0.h, #0x1
1040 ; SVE-NEXT: add z0.h, z1.h, z0.h
1043 ; SVE2-LABEL: rhadds_v8i16:
1044 ; SVE2: // %bb.0: // %entry
1045 ; SVE2-NEXT: ptrue p0.h
1046 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1049 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1050 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1051 %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1052 %add2 = add <vscale x 8 x i32> %add, %s1s
1053 %s = ashr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1054 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1055 ret <vscale x 8 x i16> %result
1058 define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1059 ; SVE-LABEL: rhadds_v8i16_lsh:
1060 ; SVE: // %bb.0: // %entry
1061 ; SVE-NEXT: asr z2.h, z1.h, #1
1062 ; SVE-NEXT: asr z3.h, z0.h, #1
1063 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1064 ; SVE-NEXT: add z1.h, z3.h, z2.h
1065 ; SVE-NEXT: and z0.h, z0.h, #0x1
1066 ; SVE-NEXT: add z0.h, z1.h, z0.h
1069 ; SVE2-LABEL: rhadds_v8i16_lsh:
1070 ; SVE2: // %bb.0: // %entry
1071 ; SVE2-NEXT: ptrue p0.h
1072 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1075 %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1076 %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1077 %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1078 %add2 = add <vscale x 8 x i32> %add, %s1s
1079 %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1080 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1081 ret <vscale x 8 x i16> %result
1084 define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
1085 ; SVE-LABEL: rhaddu_v8i16:
1086 ; SVE: // %bb.0: // %entry
1087 ; SVE-NEXT: lsr z2.h, z1.h, #1
1088 ; SVE-NEXT: lsr z3.h, z0.h, #1
1089 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1090 ; SVE-NEXT: add z1.h, z3.h, z2.h
1091 ; SVE-NEXT: and z0.h, z0.h, #0x1
1092 ; SVE-NEXT: add z0.h, z1.h, z0.h
1095 ; SVE2-LABEL: rhaddu_v8i16:
1096 ; SVE2: // %bb.0: // %entry
1097 ; SVE2-NEXT: ptrue p0.h
1098 ; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
1101 %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
1102 %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
1103 %add = add nuw nsw <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1104 %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
1105 %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
1106 %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
1107 ret <vscale x 8 x i16> %result
1110 define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1111 ; CHECK-LABEL: rhadds_v4i8:
1112 ; CHECK: // %bb.0: // %entry
1113 ; CHECK-NEXT: ptrue p0.s
1114 ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1115 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
1116 ; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
1117 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
1118 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
1119 ; CHECK-NEXT: asr z0.s, z0.s, #1
1122 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1123 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1124 %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1125 %add2 = add <vscale x 4 x i16> %add, %s1s
1126 %s = ashr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1127 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1128 ret <vscale x 4 x i8> %result
1131 define <vscale x 4 x i8> @rhadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1132 ; CHECK-LABEL: rhadds_v4i8_lsh:
1133 ; CHECK: // %bb.0: // %entry
1134 ; CHECK-NEXT: ptrue p0.s
1135 ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1136 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
1137 ; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
1138 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
1139 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
1140 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
1141 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1144 %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1145 %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1146 %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1147 %add2 = add <vscale x 4 x i16> %add, %s1s
1148 %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1149 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1150 ret <vscale x 4 x i8> %result
1153 define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1154 ; SVE-LABEL: rhaddu_v4i8:
1155 ; SVE: // %bb.0: // %entry
1156 ; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1157 ; SVE-NEXT: and z0.s, z0.s, #0xff
1158 ; SVE-NEXT: and z1.s, z1.s, #0xff
1159 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1160 ; SVE-NEXT: sub z0.s, z1.s, z0.s
1161 ; SVE-NEXT: lsr z0.s, z0.s, #1
1164 ; SVE2-LABEL: rhaddu_v4i8:
1165 ; SVE2: // %bb.0: // %entry
1166 ; SVE2-NEXT: ptrue p0.s
1167 ; SVE2-NEXT: and z0.s, z0.s, #0xff
1168 ; SVE2-NEXT: and z1.s, z1.s, #0xff
1169 ; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
1172 %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
1173 %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
1174 %add = add nuw nsw <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1175 %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
1176 %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
1177 %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
1178 ret <vscale x 4 x i8> %result
1181 define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1182 ; SVE-LABEL: rhadds_v8i8:
1183 ; SVE: // %bb.0: // %entry
1184 ; SVE-NEXT: ptrue p0.h
1185 ; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1186 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
1187 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
1188 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1189 ; SVE-NEXT: sub z0.h, z1.h, z0.h
1190 ; SVE-NEXT: asr z0.h, z0.h, #1
1193 ; SVE2-LABEL: rhadds_v8i8:
1194 ; SVE2: // %bb.0: // %entry
1195 ; SVE2-NEXT: ptrue p0.h
1196 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
1197 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
1198 ; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
1201 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1202 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1203 %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1204 %add2 = add <vscale x 8 x i16> %add, %s1s
1205 %s = ashr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1206 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1207 ret <vscale x 8 x i8> %result
1210 define <vscale x 8 x i8> @rhadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1211 ; CHECK-LABEL: rhadds_v8i8_lsh:
1212 ; CHECK: // %bb.0: // %entry
1213 ; CHECK-NEXT: ptrue p0.h
1214 ; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1215 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
1216 ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
1217 ; CHECK-NEXT: eor z0.d, z0.d, z2.d
1218 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
1219 ; CHECK-NEXT: lsr z0.h, z0.h, #1
1222 %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1223 %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1224 %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1225 %add2 = add <vscale x 8 x i16> %add, %s1s
1226 %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1227 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1228 ret <vscale x 8 x i8> %result
1231 define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
1232 ; SVE-LABEL: rhaddu_v8i8:
1233 ; SVE: // %bb.0: // %entry
1234 ; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1235 ; SVE-NEXT: and z0.h, z0.h, #0xff
1236 ; SVE-NEXT: and z1.h, z1.h, #0xff
1237 ; SVE-NEXT: eor z0.d, z0.d, z2.d
1238 ; SVE-NEXT: sub z0.h, z1.h, z0.h
1239 ; SVE-NEXT: lsr z0.h, z0.h, #1
1242 ; SVE2-LABEL: rhaddu_v8i8:
1243 ; SVE2: // %bb.0: // %entry
1244 ; SVE2-NEXT: ptrue p0.h
1245 ; SVE2-NEXT: and z0.h, z0.h, #0xff
1246 ; SVE2-NEXT: and z1.h, z1.h, #0xff
1247 ; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
1250 %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
1251 %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
1252 %add = add nuw nsw <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1253 %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
1254 %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
1255 %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
1256 ret <vscale x 8 x i8> %result
1259 define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1260 ; SVE-LABEL: rhadds_v16i8:
1261 ; SVE: // %bb.0: // %entry
1262 ; SVE-NEXT: asr z2.b, z1.b, #1
1263 ; SVE-NEXT: asr z3.b, z0.b, #1
1264 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1265 ; SVE-NEXT: add z1.b, z3.b, z2.b
1266 ; SVE-NEXT: and z0.b, z0.b, #0x1
1267 ; SVE-NEXT: add z0.b, z1.b, z0.b
1270 ; SVE2-LABEL: rhadds_v16i8:
1271 ; SVE2: // %bb.0: // %entry
1272 ; SVE2-NEXT: ptrue p0.b
1273 ; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
1276 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1277 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1278 %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1279 %add2 = add <vscale x 16 x i16> %add, %s1s
1280 %s = ashr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1281 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1282 ret <vscale x 16 x i8> %result
1285 define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1286 ; SVE-LABEL: rhadds_v16i8_lsh:
1287 ; SVE: // %bb.0: // %entry
1288 ; SVE-NEXT: asr z2.b, z1.b, #1
1289 ; SVE-NEXT: asr z3.b, z0.b, #1
1290 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1291 ; SVE-NEXT: add z1.b, z3.b, z2.b
1292 ; SVE-NEXT: and z0.b, z0.b, #0x1
1293 ; SVE-NEXT: add z0.b, z1.b, z0.b
1296 ; SVE2-LABEL: rhadds_v16i8_lsh:
1297 ; SVE2: // %bb.0: // %entry
1298 ; SVE2-NEXT: ptrue p0.b
1299 ; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
1302 %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1303 %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1304 %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1305 %add2 = add <vscale x 16 x i16> %add, %s1s
1306 %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1307 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1308 ret <vscale x 16 x i8> %result
1311 define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
1312 ; SVE-LABEL: rhaddu_v16i8:
1313 ; SVE: // %bb.0: // %entry
1314 ; SVE-NEXT: lsr z2.b, z1.b, #1
1315 ; SVE-NEXT: lsr z3.b, z0.b, #1
1316 ; SVE-NEXT: orr z0.d, z0.d, z1.d
1317 ; SVE-NEXT: add z1.b, z3.b, z2.b
1318 ; SVE-NEXT: and z0.b, z0.b, #0x1
1319 ; SVE-NEXT: add z0.b, z1.b, z0.b
1322 ; SVE2-LABEL: rhaddu_v16i8:
1323 ; SVE2: // %bb.0: // %entry
1324 ; SVE2-NEXT: ptrue p0.b
1325 ; SVE2-NEXT: urhadd z0.b, p0/m, z0.b, z1.b
1328 %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
1329 %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
1330 %add = add nuw nsw <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1331 %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
1332 %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
1333 %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
1334 ret <vscale x 16 x i8> %result