1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
5 ; CHECK-LABEL: vhadds_v16i8:
7 ; CHECK-NEXT: vadd.i8 q0, q0, q1
8 ; CHECK-NEXT: vshr.s8 q0, q0, #1
10 %add = add <16 x i8> %x, %y
11 %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14 define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
15 ; CHECK-LABEL: vhaddu_v16i8:
17 ; CHECK-NEXT: vadd.i8 q0, q0, q1
18 ; CHECK-NEXT: vshr.u8 q0, q0, #1
20 %add = add <16 x i8> %x, %y
21 %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
24 define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
25 ; CHECK-LABEL: vhadds_v8i16:
27 ; CHECK-NEXT: vadd.i16 q0, q0, q1
28 ; CHECK-NEXT: vshr.s16 q0, q0, #1
30 %add = add <8 x i16> %x, %y
31 %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
34 define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
35 ; CHECK-LABEL: vhaddu_v8i16:
37 ; CHECK-NEXT: vadd.i16 q0, q0, q1
38 ; CHECK-NEXT: vshr.u16 q0, q0, #1
40 %add = add <8 x i16> %x, %y
41 %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
44 define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
45 ; CHECK-LABEL: vhadds_v4i32:
47 ; CHECK-NEXT: vadd.i32 q0, q0, q1
48 ; CHECK-NEXT: vshr.s32 q0, q0, #1
50 %add = add <4 x i32> %x, %y
51 %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
54 define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
55 ; CHECK-LABEL: vhaddu_v4i32:
57 ; CHECK-NEXT: vadd.i32 q0, q0, q1
58 ; CHECK-NEXT: vshr.u32 q0, q0, #1
60 %add = add <4 x i32> %x, %y
61 %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
64 define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8(<16 x i8> %x, <16 x i8> %y) {
65 ; CHECK-LABEL: vhsubs_v16i8:
67 ; CHECK-NEXT: vsub.i8 q0, q0, q1
68 ; CHECK-NEXT: vshr.s8 q0, q0, #1
70 %sub = sub <16 x i8> %x, %y
71 %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74 define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8(<16 x i8> %x, <16 x i8> %y) {
75 ; CHECK-LABEL: vhsubu_v16i8:
77 ; CHECK-NEXT: vsub.i8 q0, q0, q1
78 ; CHECK-NEXT: vshr.u8 q0, q0, #1
80 %sub = sub <16 x i8> %x, %y
81 %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
84 define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16(<8 x i16> %x, <8 x i16> %y) {
85 ; CHECK-LABEL: vhsubs_v8i16:
87 ; CHECK-NEXT: vsub.i16 q0, q0, q1
88 ; CHECK-NEXT: vshr.s16 q0, q0, #1
90 %sub = sub <8 x i16> %x, %y
91 %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
94 define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16(<8 x i16> %x, <8 x i16> %y) {
95 ; CHECK-LABEL: vhsubu_v8i16:
97 ; CHECK-NEXT: vsub.i16 q0, q0, q1
98 ; CHECK-NEXT: vshr.u16 q0, q0, #1
100 %sub = sub <8 x i16> %x, %y
101 %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
104 define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32(<4 x i32> %x, <4 x i32> %y) {
105 ; CHECK-LABEL: vhsubs_v4i32:
107 ; CHECK-NEXT: vsub.i32 q0, q0, q1
108 ; CHECK-NEXT: vshr.s32 q0, q0, #1
110 %sub = sub <4 x i32> %x, %y
111 %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
114 define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32(<4 x i32> %x, <4 x i32> %y) {
115 ; CHECK-LABEL: vhsubu_v4i32:
117 ; CHECK-NEXT: vsub.i32 q0, q0, q1
118 ; CHECK-NEXT: vshr.u32 q0, q0, #1
120 %sub = sub <4 x i32> %x, %y
121 %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
125 define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
126 ; CHECK-LABEL: vhadds_v16i8_nw:
128 ; CHECK-NEXT: vhadd.s8 q0, q0, q1
130 %add = add nsw <16 x i8> %x, %y
131 %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
134 define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
135 ; CHECK-LABEL: vhaddu_v16i8_nw:
137 ; CHECK-NEXT: vhadd.u8 q0, q0, q1
139 %add = add nuw <16 x i8> %x, %y
140 %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
143 define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
144 ; CHECK-LABEL: vhadds_v8i16_nw:
146 ; CHECK-NEXT: vhadd.s16 q0, q0, q1
148 %add = add nsw <8 x i16> %x, %y
149 %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
152 define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
153 ; CHECK-LABEL: vhaddu_v8i16_nw:
155 ; CHECK-NEXT: vhadd.u16 q0, q0, q1
157 %add = add nuw <8 x i16> %x, %y
158 %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
161 define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
162 ; CHECK-LABEL: vhadds_v4i32_nw:
164 ; CHECK-NEXT: vhadd.s32 q0, q0, q1
166 %add = add nsw <4 x i32> %x, %y
167 %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
170 define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
171 ; CHECK-LABEL: vhaddu_v4i32_nw:
173 ; CHECK-NEXT: vhadd.u32 q0, q0, q1
175 %add = add nuw <4 x i32> %x, %y
176 %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
179 define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
180 ; CHECK-LABEL: vhsubs_v16i8_nw:
182 ; CHECK-NEXT: vhsub.s8 q0, q0, q1
184 %sub = sub nsw <16 x i8> %x, %y
185 %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
188 define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
189 ; CHECK-LABEL: vhsubu_v16i8_nw:
191 ; CHECK-NEXT: vhsub.u8 q0, q0, q1
193 %sub = sub nuw <16 x i8> %x, %y
194 %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
197 define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
198 ; CHECK-LABEL: vhsubs_v8i16_nw:
200 ; CHECK-NEXT: vhsub.s16 q0, q0, q1
202 %sub = sub nsw <8 x i16> %x, %y
203 %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
206 define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
207 ; CHECK-LABEL: vhsubu_v8i16_nw:
209 ; CHECK-NEXT: vhsub.u16 q0, q0, q1
211 %sub = sub nuw <8 x i16> %x, %y
212 %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
215 define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
216 ; CHECK-LABEL: vhsubs_v4i32_nw:
218 ; CHECK-NEXT: vhsub.s32 q0, q0, q1
220 %sub = sub nsw <4 x i32> %x, %y
221 %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
224 define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
225 ; CHECK-LABEL: vhsubu_v4i32_nw:
227 ; CHECK-NEXT: vhsub.u32 q0, q0, q1
229 %sub = sub nuw <4 x i32> %x, %y
230 %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
233 define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
234 ; CHECK-LABEL: vrhadds_v16i8:
236 ; CHECK-NEXT: vadd.i8 q0, q0, q1
237 ; CHECK-NEXT: vmov.i8 q1, #0x1
238 ; CHECK-NEXT: vadd.i8 q0, q0, q1
239 ; CHECK-NEXT: vshr.s8 q0, q0, #1
241 %add = add <16 x i8> %x, %y
242 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
243 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
246 define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
247 ; CHECK-LABEL: vrhaddu_v16i8:
249 ; CHECK-NEXT: vadd.i8 q0, q0, q1
250 ; CHECK-NEXT: vmov.i8 q1, #0x1
251 ; CHECK-NEXT: vadd.i8 q0, q0, q1
252 ; CHECK-NEXT: vshr.u8 q0, q0, #1
254 %add = add <16 x i8> %x, %y
255 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
256 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
259 define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
260 ; CHECK-LABEL: vrhadds_v8i16:
262 ; CHECK-NEXT: vadd.i16 q0, q0, q1
263 ; CHECK-NEXT: vmov.i16 q1, #0x1
264 ; CHECK-NEXT: vadd.i16 q0, q0, q1
265 ; CHECK-NEXT: vshr.s16 q0, q0, #1
267 %add = add <8 x i16> %x, %y
268 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
269 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
272 define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
273 ; CHECK-LABEL: vrhaddu_v8i16:
275 ; CHECK-NEXT: vadd.i16 q0, q0, q1
276 ; CHECK-NEXT: vmov.i16 q1, #0x1
277 ; CHECK-NEXT: vadd.i16 q0, q0, q1
278 ; CHECK-NEXT: vshr.u16 q0, q0, #1
280 %add = add <8 x i16> %x, %y
281 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
282 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
285 define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
286 ; CHECK-LABEL: vrhadds_v4i32:
288 ; CHECK-NEXT: vadd.i32 q0, q0, q1
289 ; CHECK-NEXT: vmov.i32 q1, #0x1
290 ; CHECK-NEXT: vadd.i32 q0, q0, q1
291 ; CHECK-NEXT: vshr.s32 q0, q0, #1
293 %add = add <4 x i32> %x, %y
294 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
295 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
298 define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
299 ; CHECK-LABEL: vrhaddu_v4i32:
301 ; CHECK-NEXT: vadd.i32 q0, q0, q1
302 ; CHECK-NEXT: vmov.i32 q1, #0x1
303 ; CHECK-NEXT: vadd.i32 q0, q0, q1
304 ; CHECK-NEXT: vshr.u32 q0, q0, #1
306 %add = add <4 x i32> %x, %y
307 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
308 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
311 define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
312 ; CHECK-LABEL: vrhadds_v16i8_nwop:
314 ; CHECK-NEXT: vadd.i8 q0, q0, q1
315 ; CHECK-NEXT: vmov.i8 q1, #0x1
316 ; CHECK-NEXT: vadd.i8 q0, q0, q1
317 ; CHECK-NEXT: vshr.s8 q0, q0, #1
319 %add = add nsw <16 x i8> %x, %y
320 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
321 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
324 define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
325 ; CHECK-LABEL: vrhaddu_v16i8_nwop:
327 ; CHECK-NEXT: vadd.i8 q0, q0, q1
328 ; CHECK-NEXT: vmov.i8 q1, #0x1
329 ; CHECK-NEXT: vadd.i8 q0, q0, q1
330 ; CHECK-NEXT: vshr.u8 q0, q0, #1
332 %add = add nuw <16 x i8> %x, %y
333 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
334 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
337 define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
338 ; CHECK-LABEL: vrhadds_v8i16_nwop:
340 ; CHECK-NEXT: vadd.i16 q0, q0, q1
341 ; CHECK-NEXT: vmov.i16 q1, #0x1
342 ; CHECK-NEXT: vadd.i16 q0, q0, q1
343 ; CHECK-NEXT: vshr.s16 q0, q0, #1
345 %add = add nsw <8 x i16> %x, %y
346 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
347 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
350 define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
351 ; CHECK-LABEL: vrhaddu_v8i16_nwop:
353 ; CHECK-NEXT: vadd.i16 q0, q0, q1
354 ; CHECK-NEXT: vmov.i16 q1, #0x1
355 ; CHECK-NEXT: vadd.i16 q0, q0, q1
356 ; CHECK-NEXT: vshr.u16 q0, q0, #1
358 %add = add nuw <8 x i16> %x, %y
359 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
360 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
363 define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
364 ; CHECK-LABEL: vrhadds_v4i32_nwop:
366 ; CHECK-NEXT: vadd.i32 q0, q0, q1
367 ; CHECK-NEXT: vmov.i32 q1, #0x1
368 ; CHECK-NEXT: vadd.i32 q0, q0, q1
369 ; CHECK-NEXT: vshr.s32 q0, q0, #1
371 %add = add nsw <4 x i32> %x, %y
372 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
373 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
376 define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
377 ; CHECK-LABEL: vrhaddu_v4i32_nwop:
379 ; CHECK-NEXT: vadd.i32 q0, q0, q1
380 ; CHECK-NEXT: vmov.i32 q1, #0x1
381 ; CHECK-NEXT: vadd.i32 q0, q0, q1
382 ; CHECK-NEXT: vshr.u32 q0, q0, #1
384 %add = add nuw <4 x i32> %x, %y
385 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
386 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
389 define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
390 ; CHECK-LABEL: vrhadds_v16i8_nwrnd:
392 ; CHECK-NEXT: vadd.i8 q0, q0, q1
393 ; CHECK-NEXT: vmov.i8 q1, #0x1
394 ; CHECK-NEXT: vhadd.s8 q0, q0, q1
396 %add = add <16 x i8> %x, %y
397 %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
398 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
401 define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
402 ; CHECK-LABEL: vrhaddu_v16i8_nwrnd:
404 ; CHECK-NEXT: vadd.i8 q0, q0, q1
405 ; CHECK-NEXT: vmov.i8 q1, #0x1
406 ; CHECK-NEXT: vhadd.u8 q0, q0, q1
408 %add = add <16 x i8> %x, %y
409 %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
410 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
413 define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
414 ; CHECK-LABEL: vrhadds_v8i16_nwrnd:
416 ; CHECK-NEXT: vadd.i16 q0, q0, q1
417 ; CHECK-NEXT: vmov.i16 q1, #0x1
418 ; CHECK-NEXT: vhadd.s16 q0, q0, q1
420 %add = add <8 x i16> %x, %y
421 %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
422 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
425 define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
426 ; CHECK-LABEL: vrhaddu_v8i16_nwrnd:
428 ; CHECK-NEXT: vadd.i16 q0, q0, q1
429 ; CHECK-NEXT: vmov.i16 q1, #0x1
430 ; CHECK-NEXT: vhadd.u16 q0, q0, q1
432 %add = add <8 x i16> %x, %y
433 %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
434 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
437 define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
438 ; CHECK-LABEL: vrhadds_v4i32_nwrnd:
440 ; CHECK-NEXT: vadd.i32 q0, q0, q1
441 ; CHECK-NEXT: vmov.i32 q1, #0x1
442 ; CHECK-NEXT: vhadd.s32 q0, q0, q1
444 %add = add <4 x i32> %x, %y
445 %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
446 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
449 define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
450 ; CHECK-LABEL: vrhaddu_v4i32_nwrnd:
452 ; CHECK-NEXT: vadd.i32 q0, q0, q1
453 ; CHECK-NEXT: vmov.i32 q1, #0x1
454 ; CHECK-NEXT: vhadd.u32 q0, q0, q1
456 %add = add <4 x i32> %x, %y
457 %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
458 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
461 define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
462 ; CHECK-LABEL: vrhadds_v16i8_both_nw:
464 ; CHECK-NEXT: vrhadd.s8 q0, q0, q1
466 %add = add nsw <16 x i8> %x, %y
467 %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
468 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
471 define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
472 ; CHECK-LABEL: vrhaddu_v16i8_both_nw:
474 ; CHECK-NEXT: vrhadd.u8 q0, q0, q1
476 %add = add nuw <16 x i8> %x, %y
477 %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
478 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
481 define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
482 ; CHECK-LABEL: vrhadds_v8i16_both_nw:
484 ; CHECK-NEXT: vrhadd.s16 q0, q0, q1
486 %add = add nsw <8 x i16> %x, %y
487 %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
488 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
491 define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
492 ; CHECK-LABEL: vrhaddu_v8i16_both_nw:
494 ; CHECK-NEXT: vrhadd.u16 q0, q0, q1
496 %add = add nuw <8 x i16> %x, %y
497 %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
498 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
501 define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
502 ; CHECK-LABEL: vrhadds_v4i32_both_nw:
504 ; CHECK-NEXT: vrhadd.s32 q0, q0, q1
506 %add = add nsw <4 x i32> %x, %y
507 %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
508 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
511 define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
512 ; CHECK-LABEL: vrhaddu_v4i32_both_nw:
514 ; CHECK-NEXT: vrhadd.u32 q0, q0, q1
516 %add = add nuw <4 x i32> %x, %y
517 %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
518 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>