1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4 define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
5 ; CHECK-LABEL: add_v4i32_x:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vctp.32 r0
9 ; CHECK-NEXT: vaddt.i32 q0, q0, q1
12 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
13 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
14 %b = add <4 x i32> %a, %x
18 define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
19 ; CHECK-LABEL: add_v8i16_x:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vctp.16 r0
23 ; CHECK-NEXT: vaddt.i16 q0, q0, q1
26 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
27 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
28 %b = add <8 x i16> %a, %x
32 define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
33 ; CHECK-LABEL: add_v16i8_x:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: vctp.8 r0
37 ; CHECK-NEXT: vaddt.i8 q0, q0, q1
40 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
41 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
42 %b = add <16 x i8> %a, %x
46 define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
47 ; CHECK-LABEL: sub_v4i32_x:
48 ; CHECK: @ %bb.0: @ %entry
49 ; CHECK-NEXT: vctp.32 r0
51 ; CHECK-NEXT: vsubt.i32 q0, q0, q1
54 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
55 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
56 %b = sub <4 x i32> %x, %a
60 define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
61 ; CHECK-LABEL: sub_v8i16_x:
62 ; CHECK: @ %bb.0: @ %entry
63 ; CHECK-NEXT: vctp.16 r0
65 ; CHECK-NEXT: vsubt.i16 q0, q0, q1
68 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
69 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
70 %b = sub <8 x i16> %x, %a
74 define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
75 ; CHECK-LABEL: sub_v16i8_x:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vctp.8 r0
79 ; CHECK-NEXT: vsubt.i8 q0, q0, q1
82 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
83 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
84 %b = sub <16 x i8> %x, %a
88 define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
89 ; CHECK-LABEL: mul_v4i32_x:
90 ; CHECK: @ %bb.0: @ %entry
91 ; CHECK-NEXT: vctp.32 r0
93 ; CHECK-NEXT: vmult.i32 q0, q0, q1
96 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
97 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
98 %b = mul <4 x i32> %a, %x
102 define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
103 ; CHECK-LABEL: mul_v8i16_x:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vctp.16 r0
107 ; CHECK-NEXT: vmult.i16 q0, q0, q1
110 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
111 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
112 %b = mul <8 x i16> %a, %x
116 define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
117 ; CHECK-LABEL: mul_v16i8_x:
118 ; CHECK: @ %bb.0: @ %entry
119 ; CHECK-NEXT: vctp.8 r0
121 ; CHECK-NEXT: vmult.i8 q0, q0, q1
124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
125 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
126 %b = mul <16 x i8> %a, %x
130 define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
131 ; CHECK-LABEL: and_v4i32_x:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: vctp.32 r0
135 ; CHECK-NEXT: vandt q0, q0, q1
138 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
139 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
140 %b = and <4 x i32> %a, %x
144 define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
145 ; CHECK-LABEL: and_v8i16_x:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: vctp.16 r0
149 ; CHECK-NEXT: vandt q0, q0, q1
152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
153 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
154 %b = and <8 x i16> %a, %x
158 define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
159 ; CHECK-LABEL: and_v16i8_x:
160 ; CHECK: @ %bb.0: @ %entry
161 ; CHECK-NEXT: vctp.8 r0
163 ; CHECK-NEXT: vandt q0, q0, q1
166 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
167 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
168 %b = and <16 x i8> %a, %x
172 define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
173 ; CHECK-LABEL: or_v4i32_x:
174 ; CHECK: @ %bb.0: @ %entry
175 ; CHECK-NEXT: vctp.32 r0
177 ; CHECK-NEXT: vorrt q0, q0, q1
180 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
181 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
182 %b = or <4 x i32> %a, %x
186 define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
187 ; CHECK-LABEL: or_v8i16_x:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vctp.16 r0
191 ; CHECK-NEXT: vorrt q0, q0, q1
194 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
195 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
196 %b = or <8 x i16> %a, %x
200 define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
201 ; CHECK-LABEL: or_v16i8_x:
202 ; CHECK: @ %bb.0: @ %entry
203 ; CHECK-NEXT: vctp.8 r0
205 ; CHECK-NEXT: vorrt q0, q0, q1
208 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
209 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
210 %b = or <16 x i8> %a, %x
214 define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
215 ; CHECK-LABEL: xor_v4i32_x:
216 ; CHECK: @ %bb.0: @ %entry
217 ; CHECK-NEXT: vctp.32 r0
219 ; CHECK-NEXT: veort q0, q0, q1
222 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
223 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
224 %b = xor <4 x i32> %a, %x
228 define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
229 ; CHECK-LABEL: xor_v8i16_x:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vctp.16 r0
233 ; CHECK-NEXT: veort q0, q0, q1
236 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
237 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
238 %b = xor <8 x i16> %a, %x
242 define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
243 ; CHECK-LABEL: xor_v16i8_x:
244 ; CHECK: @ %bb.0: @ %entry
245 ; CHECK-NEXT: vctp.8 r0
247 ; CHECK-NEXT: veort q0, q0, q1
250 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
251 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
252 %b = xor <16 x i8> %a, %x
256 define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
257 ; CHECK-LABEL: shl_v4i32_x:
258 ; CHECK: @ %bb.0: @ %entry
259 ; CHECK-NEXT: vctp.32 r0
261 ; CHECK-NEXT: vshlt.u32 q0, q0, q1
264 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
265 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
266 %b = shl <4 x i32> %x, %a
270 define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
271 ; CHECK-LABEL: shl_v8i16_x:
272 ; CHECK: @ %bb.0: @ %entry
273 ; CHECK-NEXT: vctp.16 r0
275 ; CHECK-NEXT: vshlt.u16 q0, q0, q1
278 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
279 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
280 %b = shl <8 x i16> %x, %a
284 define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
285 ; CHECK-LABEL: shl_v16i8_x:
286 ; CHECK: @ %bb.0: @ %entry
287 ; CHECK-NEXT: vctp.8 r0
289 ; CHECK-NEXT: vshlt.u8 q0, q0, q1
292 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
293 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
294 %b = shl <16 x i8> %x, %a
298 define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
299 ; CHECK-LABEL: ashr_v4i32_x:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: vneg.s32 q1, q1
302 ; CHECK-NEXT: vctp.32 r0
304 ; CHECK-NEXT: vshlt.s32 q0, q0, q1
307 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
308 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
309 %b = ashr <4 x i32> %x, %a
313 define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
314 ; CHECK-LABEL: ashr_v8i16_x:
315 ; CHECK: @ %bb.0: @ %entry
316 ; CHECK-NEXT: vneg.s16 q1, q1
317 ; CHECK-NEXT: vctp.16 r0
319 ; CHECK-NEXT: vshlt.s16 q0, q0, q1
322 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
323 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
324 %b = ashr <8 x i16> %x, %a
328 define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
329 ; CHECK-LABEL: ashr_v16i8_x:
330 ; CHECK: @ %bb.0: @ %entry
331 ; CHECK-NEXT: vneg.s8 q1, q1
332 ; CHECK-NEXT: vctp.8 r0
334 ; CHECK-NEXT: vshlt.s8 q0, q0, q1
337 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
338 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
339 %b = ashr <16 x i8> %x, %a
343 define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
344 ; CHECK-LABEL: lshr_v4i32_x:
345 ; CHECK: @ %bb.0: @ %entry
346 ; CHECK-NEXT: vneg.s32 q1, q1
347 ; CHECK-NEXT: vctp.32 r0
349 ; CHECK-NEXT: vshlt.u32 q0, q0, q1
352 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
353 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
354 %b = lshr <4 x i32> %x, %a
358 define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
359 ; CHECK-LABEL: lshr_v8i16_x:
360 ; CHECK: @ %bb.0: @ %entry
361 ; CHECK-NEXT: vneg.s16 q1, q1
362 ; CHECK-NEXT: vctp.16 r0
364 ; CHECK-NEXT: vshlt.u16 q0, q0, q1
367 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
368 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
369 %b = lshr <8 x i16> %x, %a
373 define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
374 ; CHECK-LABEL: lshr_v16i8_x:
375 ; CHECK: @ %bb.0: @ %entry
376 ; CHECK-NEXT: vneg.s8 q1, q1
377 ; CHECK-NEXT: vctp.8 r0
379 ; CHECK-NEXT: vshlt.u8 q0, q0, q1
382 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
383 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
384 %b = lshr <16 x i8> %x, %a
388 define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
389 ; CHECK-LABEL: andnot_v4i32_x:
390 ; CHECK: @ %bb.0: @ %entry
391 ; CHECK-NEXT: vctp.32 r0
393 ; CHECK-NEXT: vbict q0, q0, q1
396 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
397 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
398 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
399 %b = and <4 x i32> %a, %x
403 define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
404 ; CHECK-LABEL: andnot_v8i16_x:
405 ; CHECK: @ %bb.0: @ %entry
406 ; CHECK-NEXT: vctp.16 r0
408 ; CHECK-NEXT: vbict q0, q0, q1
411 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
412 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
413 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
414 %b = and <8 x i16> %a, %x
418 define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
419 ; CHECK-LABEL: andnot_v16i8_x:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: vctp.8 r0
423 ; CHECK-NEXT: vbict q0, q0, q1
426 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
427 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
428 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
429 %b = and <16 x i8> %a, %x
433 define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
434 ; CHECK-LABEL: ornot_v4i32_x:
435 ; CHECK: @ %bb.0: @ %entry
436 ; CHECK-NEXT: vctp.32 r0
438 ; CHECK-NEXT: vornt q0, q0, q1
441 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
442 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
443 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> zeroinitializer
444 %b = or <4 x i32> %a, %x
448 define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
449 ; CHECK-LABEL: ornot_v8i16_x:
450 ; CHECK: @ %bb.0: @ %entry
451 ; CHECK-NEXT: vctp.16 r0
453 ; CHECK-NEXT: vornt q0, q0, q1
456 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
457 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
458 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> zeroinitializer
459 %b = or <8 x i16> %a, %x
463 define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
464 ; CHECK-LABEL: ornot_v16i8_x:
465 ; CHECK: @ %bb.0: @ %entry
466 ; CHECK-NEXT: vctp.8 r0
468 ; CHECK-NEXT: vornt q0, q0, q1
471 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
472 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
473 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> zeroinitializer
474 %b = or <16 x i8> %a, %x
478 define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
479 ; CHECK-LABEL: fadd_v4f32_x:
480 ; CHECK: @ %bb.0: @ %entry
481 ; CHECK-NEXT: vctp.32 r0
483 ; CHECK-NEXT: vaddt.f32 q0, q0, q1
486 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
487 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
488 %b = fadd <4 x float> %a, %x
492 define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x2(<4 x float> %x, <4 x float> %y, i32 %n) {
493 ; CHECK-LABEL: fadd_v4f32_x2:
494 ; CHECK: @ %bb.0: @ %entry
495 ; CHECK-NEXT: vmov.i32 q2, #0x0
496 ; CHECK-NEXT: vctp.32 r0
498 ; CHECK-NEXT: vmovt q2, q1
499 ; CHECK-NEXT: vadd.f32 q0, q2, q0
502 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
503 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
504 %b = fadd <4 x float> %a, %x
508 define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x3(<4 x float> %x, <4 x float> %y, i32 %n) {
509 ; CHECK-LABEL: fadd_v4f32_x3:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vctp.32 r0
513 ; CHECK-NEXT: vaddt.f32 q0, q0, q1
516 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
517 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
518 %b = fadd nsz <4 x float> %a, %x
522 define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
523 ; CHECK-LABEL: fadd_v8f16_x:
524 ; CHECK: @ %bb.0: @ %entry
525 ; CHECK-NEXT: vctp.16 r0
527 ; CHECK-NEXT: vaddt.f16 q0, q0, q1
530 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
531 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
532 %b = fadd <8 x half> %a, %x
536 define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x2(<8 x half> %x, <8 x half> %y, i32 %n) {
537 ; CHECK-LABEL: fadd_v8f16_x2:
538 ; CHECK: @ %bb.0: @ %entry
539 ; CHECK-NEXT: vmov.i32 q2, #0x0
540 ; CHECK-NEXT: vctp.16 r0
542 ; CHECK-NEXT: vmovt q2, q1
543 ; CHECK-NEXT: vadd.f16 q0, q2, q0
546 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
547 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0x0000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000>
548 %b = fadd <8 x half> %a, %x
552 define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x3(<8 x half> %x, <8 x half> %y, i32 %n) {
553 ; CHECK-LABEL: fadd_v8f16_x3:
554 ; CHECK: @ %bb.0: @ %entry
555 ; CHECK-NEXT: vctp.16 r0
557 ; CHECK-NEXT: vaddt.f16 q0, q0, q1
560 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
561 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0x0000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000>
562 %b = fadd nsz <8 x half> %a, %x
566 define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
567 ; CHECK-LABEL: fsub_v4f32_x:
568 ; CHECK: @ %bb.0: @ %entry
569 ; CHECK-NEXT: vctp.32 r0
571 ; CHECK-NEXT: vsubt.f32 q0, q0, q1
574 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
575 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> zeroinitializer
576 %b = fsub <4 x float> %x, %a
580 define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
581 ; CHECK-LABEL: fsub_v8f16_x:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vctp.16 r0
585 ; CHECK-NEXT: vsubt.f16 q0, q0, q1
588 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
589 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> zeroinitializer
590 %b = fsub <8 x half> %x, %a
594 define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
595 ; CHECK-LABEL: fmul_v4f32_x:
596 ; CHECK: @ %bb.0: @ %entry
597 ; CHECK-NEXT: vctp.32 r0
599 ; CHECK-NEXT: vmult.f32 q0, q0, q1
602 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
603 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
604 %b = fmul <4 x float> %a, %x
608 define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
609 ; CHECK-LABEL: fmul_v8f16_x:
610 ; CHECK: @ %bb.0: @ %entry
611 ; CHECK-NEXT: vctp.16 r0
613 ; CHECK-NEXT: vmult.f16 q0, q0, q1
616 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
617 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
618 %b = fmul <8 x half> %a, %x
622 define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
623 ; CHECK-LABEL: fdiv_v4f32_x:
624 ; CHECK: @ %bb.0: @ %entry
625 ; CHECK-NEXT: vdiv.f32 s7, s3, s7
626 ; CHECK-NEXT: vctp.32 r0
627 ; CHECK-NEXT: vdiv.f32 s6, s2, s6
628 ; CHECK-NEXT: vdiv.f32 s5, s1, s5
629 ; CHECK-NEXT: vdiv.f32 s4, s0, s4
631 ; CHECK-NEXT: vmovt q0, q1
634 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
635 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
636 %b = fdiv <4 x float> %x, %a
640 define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
641 ; CHECK-LABEL: fdiv_v8f16_x:
642 ; CHECK: @ %bb.0: @ %entry
643 ; CHECK-NEXT: vmovx.f16 s8, s4
644 ; CHECK-NEXT: vmovx.f16 s10, s0
645 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
646 ; CHECK-NEXT: vdiv.f16 s4, s0, s4
647 ; CHECK-NEXT: vins.f16 s4, s8
648 ; CHECK-NEXT: vmovx.f16 s8, s5
649 ; CHECK-NEXT: vmovx.f16 s10, s1
650 ; CHECK-NEXT: vdiv.f16 s5, s1, s5
651 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
652 ; CHECK-NEXT: vmovx.f16 s10, s2
653 ; CHECK-NEXT: vins.f16 s5, s8
654 ; CHECK-NEXT: vmovx.f16 s8, s6
655 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
656 ; CHECK-NEXT: vdiv.f16 s6, s2, s6
657 ; CHECK-NEXT: vins.f16 s6, s8
658 ; CHECK-NEXT: vmovx.f16 s8, s7
659 ; CHECK-NEXT: vmovx.f16 s10, s3
660 ; CHECK-NEXT: vdiv.f16 s7, s3, s7
661 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
662 ; CHECK-NEXT: vctp.16 r0
663 ; CHECK-NEXT: vins.f16 s7, s8
665 ; CHECK-NEXT: vmovt q0, q1
668 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
669 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
670 %b = fdiv <8 x half> %x, %a
674 define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
675 ; CHECK-LABEL: fmai_v4f32_x:
676 ; CHECK: @ %bb.0: @ %entry
677 ; CHECK-NEXT: vmov.i32 q3, #0x80000000
678 ; CHECK-NEXT: vctp.32 r0
680 ; CHECK-NEXT: vmovt q3, q0
681 ; CHECK-NEXT: vfma.f32 q3, q1, q2
682 ; CHECK-NEXT: vmov q0, q3
685 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
686 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
687 %b = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %a)
691 define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
692 ; CHECK-LABEL: fmai_v8f16_x:
693 ; CHECK: @ %bb.0: @ %entry
694 ; CHECK-NEXT: vmov.i16 q3, #0x8000
695 ; CHECK-NEXT: vctp.16 r0
697 ; CHECK-NEXT: vmovt q3, q0
698 ; CHECK-NEXT: vfma.f16 q3, q1, q2
699 ; CHECK-NEXT: vmov q0, q3
702 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
703 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
704 %b = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %a)
708 define arm_aapcs_vfpcc <4 x float> @fma_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
709 ; CHECK-LABEL: fma_v4f32_x:
710 ; CHECK: @ %bb.0: @ %entry
711 ; CHECK-NEXT: vctp.32 r0
713 ; CHECK-NEXT: vfmat.f32 q0, q1, q2
716 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
717 %m = fmul fast <4 x float> %y, %z
718 %a = select <4 x i1> %c, <4 x float> %m, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
719 %b = fadd fast <4 x float> %a, %x
723 define arm_aapcs_vfpcc <8 x half> @fma_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
724 ; CHECK-LABEL: fma_v8f16_x:
725 ; CHECK: @ %bb.0: @ %entry
726 ; CHECK-NEXT: vctp.16 r0
728 ; CHECK-NEXT: vfmat.f16 q0, q1, q2
731 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
732 %m = fmul fast <8 x half> %y, %z
733 %a = select <8 x i1> %c, <8 x half> %m, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
734 %b = fadd fast <8 x half> %a, %x
738 define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
739 ; CHECK-LABEL: icmp_slt_v4i32_x:
740 ; CHECK: @ %bb.0: @ %entry
741 ; CHECK-NEXT: vctp.32 r0
743 ; CHECK-NEXT: vmint.s32 q0, q0, q1
746 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
747 %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y)
748 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
752 define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
753 ; CHECK-LABEL: icmp_slt_v8i16_x:
754 ; CHECK: @ %bb.0: @ %entry
755 ; CHECK-NEXT: vctp.16 r0
757 ; CHECK-NEXT: vmint.s16 q0, q0, q1
760 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
761 %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y)
762 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
766 define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
767 ; CHECK-LABEL: icmp_slt_v16i8_x:
768 ; CHECK: @ %bb.0: @ %entry
769 ; CHECK-NEXT: vctp.8 r0
771 ; CHECK-NEXT: vmint.s8 q0, q0, q1
774 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
775 %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
776 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
780 define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
781 ; CHECK-LABEL: icmp_sgt_v4i32_x:
782 ; CHECK: @ %bb.0: @ %entry
783 ; CHECK-NEXT: vctp.32 r0
785 ; CHECK-NEXT: vmaxt.s32 q0, q0, q1
788 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
789 %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y)
790 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
794 define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
795 ; CHECK-LABEL: icmp_sgt_v8i16_x:
796 ; CHECK: @ %bb.0: @ %entry
797 ; CHECK-NEXT: vctp.16 r0
799 ; CHECK-NEXT: vmaxt.s16 q0, q0, q1
802 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
803 %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y)
804 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
808 define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
809 ; CHECK-LABEL: icmp_sgt_v16i8_x:
810 ; CHECK: @ %bb.0: @ %entry
811 ; CHECK-NEXT: vctp.8 r0
813 ; CHECK-NEXT: vmaxt.s8 q0, q0, q1
816 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
817 %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
818 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
822 define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
823 ; CHECK-LABEL: icmp_ult_v4i32_x:
824 ; CHECK: @ %bb.0: @ %entry
825 ; CHECK-NEXT: vctp.32 r0
827 ; CHECK-NEXT: vmint.u32 q0, q0, q1
830 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
831 %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y)
832 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
836 define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
837 ; CHECK-LABEL: icmp_ult_v8i16_x:
838 ; CHECK: @ %bb.0: @ %entry
839 ; CHECK-NEXT: vctp.16 r0
841 ; CHECK-NEXT: vmint.u16 q0, q0, q1
844 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
845 %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y)
846 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
850 define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
851 ; CHECK-LABEL: icmp_ult_v16i8_x:
852 ; CHECK: @ %bb.0: @ %entry
853 ; CHECK-NEXT: vctp.8 r0
855 ; CHECK-NEXT: vmint.u8 q0, q0, q1
858 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
859 %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
860 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
864 define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
865 ; CHECK-LABEL: icmp_ugt_v4i32_x:
866 ; CHECK: @ %bb.0: @ %entry
867 ; CHECK-NEXT: vctp.32 r0
869 ; CHECK-NEXT: vmaxt.u32 q0, q0, q1
872 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
873 %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y)
874 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
878 define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
879 ; CHECK-LABEL: icmp_ugt_v8i16_x:
880 ; CHECK: @ %bb.0: @ %entry
881 ; CHECK-NEXT: vctp.16 r0
883 ; CHECK-NEXT: vmaxt.u16 q0, q0, q1
886 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
887 %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y)
888 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
892 define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
893 ; CHECK-LABEL: icmp_ugt_v16i8_x:
894 ; CHECK: @ %bb.0: @ %entry
895 ; CHECK-NEXT: vctp.8 r0
897 ; CHECK-NEXT: vmaxt.u8 q0, q0, q1
900 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
901 %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
902 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
906 define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
907 ; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
908 ; CHECK: @ %bb.0: @ %entry
909 ; CHECK-NEXT: vctp.32 r0
911 ; CHECK-NEXT: vminnmt.f32 q0, q0, q1
914 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
915 %a1 = fcmp fast olt <4 x float> %x, %y
916 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
917 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
921 define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
922 ; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
923 ; CHECK: @ %bb.0: @ %entry
924 ; CHECK-NEXT: vctp.16 r0
926 ; CHECK-NEXT: vminnmt.f16 q0, q0, q1
929 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
930 %a1 = fcmp fast olt <8 x half> %x, %y
931 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
932 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
936 define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
937 ; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
938 ; CHECK: @ %bb.0: @ %entry
939 ; CHECK-NEXT: vctp.32 r0
941 ; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1
944 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
945 %a1 = fcmp fast ogt <4 x float> %x, %y
946 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
947 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
951 define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
952 ; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
953 ; CHECK: @ %bb.0: @ %entry
954 ; CHECK-NEXT: vctp.16 r0
956 ; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1
959 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
960 %a1 = fcmp fast ogt <8 x half> %x, %y
961 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
962 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
966 define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
967 ; CHECK-LABEL: sadd_sat_v4i32_x:
968 ; CHECK: @ %bb.0: @ %entry
969 ; CHECK-NEXT: vctp.32 r0
971 ; CHECK-NEXT: vqaddt.s32 q0, q0, q1
974 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
975 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
976 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
980 define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
981 ; CHECK-LABEL: sadd_sat_v8i16_x:
982 ; CHECK: @ %bb.0: @ %entry
983 ; CHECK-NEXT: vctp.16 r0
985 ; CHECK-NEXT: vqaddt.s16 q0, q0, q1
988 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
989 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
990 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
994 define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
995 ; CHECK-LABEL: sadd_sat_v16i8_x:
996 ; CHECK: @ %bb.0: @ %entry
997 ; CHECK-NEXT: vctp.8 r0
999 ; CHECK-NEXT: vqaddt.s8 q0, q0, q1
1002 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1003 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1004 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1008 define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1009 ; CHECK-LABEL: uadd_sat_v4i32_x:
1010 ; CHECK: @ %bb.0: @ %entry
1011 ; CHECK-NEXT: vctp.32 r0
1013 ; CHECK-NEXT: vqaddt.u32 q0, q0, q1
1016 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1017 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1018 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1022 define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1023 ; CHECK-LABEL: uadd_sat_v8i16_x:
1024 ; CHECK: @ %bb.0: @ %entry
1025 ; CHECK-NEXT: vctp.16 r0
1027 ; CHECK-NEXT: vqaddt.u16 q0, q0, q1
1030 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1031 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
1032 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1036 define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1037 ; CHECK-LABEL: uadd_sat_v16i8_x:
1038 ; CHECK: @ %bb.0: @ %entry
1039 ; CHECK-NEXT: vctp.8 r0
1041 ; CHECK-NEXT: vqaddt.u8 q0, q0, q1
1044 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1045 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1046 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1050 define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1051 ; CHECK-LABEL: ssub_sat_v4i32_x:
1052 ; CHECK: @ %bb.0: @ %entry
1053 ; CHECK-NEXT: vctp.32 r0
1055 ; CHECK-NEXT: vqsubt.s32 q0, q0, q1
1058 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1059 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1060 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1064 define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1065 ; CHECK-LABEL: ssub_sat_v8i16_x:
1066 ; CHECK: @ %bb.0: @ %entry
1067 ; CHECK-NEXT: vctp.16 r0
1069 ; CHECK-NEXT: vqsubt.s16 q0, q0, q1
1072 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1073 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
1074 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1078 define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1079 ; CHECK-LABEL: ssub_sat_v16i8_x:
1080 ; CHECK: @ %bb.0: @ %entry
1081 ; CHECK-NEXT: vctp.8 r0
1083 ; CHECK-NEXT: vqsubt.s8 q0, q0, q1
1086 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1087 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1088 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1092 define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1093 ; CHECK-LABEL: usub_sat_v4i32_x:
1094 ; CHECK: @ %bb.0: @ %entry
1095 ; CHECK-NEXT: vctp.32 r0
1097 ; CHECK-NEXT: vqsubt.u32 q0, q0, q1
1100 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1101 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1102 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1106 define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1107 ; CHECK-LABEL: usub_sat_v8i16_x:
1108 ; CHECK: @ %bb.0: @ %entry
1109 ; CHECK-NEXT: vctp.16 r0
1111 ; CHECK-NEXT: vqsubt.u16 q0, q0, q1
1114 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1115 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
1116 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1120 define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1121 ; CHECK-LABEL: usub_sat_v16i8_x:
1122 ; CHECK: @ %bb.0: @ %entry
1123 ; CHECK-NEXT: vctp.8 r0
1125 ; CHECK-NEXT: vqsubt.u8 q0, q0, q1
1128 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1129 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1130 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1134 define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1135 ; CHECK-LABEL: addqr_v4i32_x:
1136 ; CHECK: @ %bb.0: @ %entry
1137 ; CHECK-NEXT: vctp.32 r1
1139 ; CHECK-NEXT: vaddt.i32 q0, q0, r0
1142 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1143 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1144 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1145 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
1146 %b = add <4 x i32> %a, %x
1150 define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1151 ; CHECK-LABEL: addqr_v8i16_x:
1152 ; CHECK: @ %bb.0: @ %entry
1153 ; CHECK-NEXT: vctp.16 r1
1155 ; CHECK-NEXT: vaddt.i16 q0, q0, r0
1158 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1159 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1160 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1161 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
1162 %b = add <8 x i16> %a, %x
1166 define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1167 ; CHECK-LABEL: addqr_v16i8_x:
1168 ; CHECK: @ %bb.0: @ %entry
1169 ; CHECK-NEXT: vctp.8 r1
1171 ; CHECK-NEXT: vaddt.i8 q0, q0, r0
1174 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1175 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1176 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1177 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
1178 %b = add <16 x i8> %a, %x
1182 define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1183 ; CHECK-LABEL: subqr_v4i32_x:
1184 ; CHECK: @ %bb.0: @ %entry
1185 ; CHECK-NEXT: vctp.32 r1
1187 ; CHECK-NEXT: vsubt.i32 q0, q0, r0
1190 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1191 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1192 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1193 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
1194 %b = sub <4 x i32> %x, %a
1198 define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1199 ; CHECK-LABEL: subqr_v8i16_x:
1200 ; CHECK: @ %bb.0: @ %entry
1201 ; CHECK-NEXT: vctp.16 r1
1203 ; CHECK-NEXT: vsubt.i16 q0, q0, r0
1206 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1207 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1208 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1209 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
1210 %b = sub <8 x i16> %x, %a
1214 define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1215 ; CHECK-LABEL: subqr_v16i8_x:
1216 ; CHECK: @ %bb.0: @ %entry
1217 ; CHECK-NEXT: vctp.8 r1
1219 ; CHECK-NEXT: vsubt.i8 q0, q0, r0
1222 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1223 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1224 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1225 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
1226 %b = sub <16 x i8> %x, %a
1230 define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1231 ; CHECK-LABEL: mulqr_v4i32_x:
1232 ; CHECK: @ %bb.0: @ %entry
1233 ; CHECK-NEXT: vctp.32 r1
1235 ; CHECK-NEXT: vmult.i32 q0, q0, r0
1238 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1239 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1240 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1241 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1242 %b = mul <4 x i32> %a, %x
1246 define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1247 ; CHECK-LABEL: mulqr_v8i16_x:
1248 ; CHECK: @ %bb.0: @ %entry
1249 ; CHECK-NEXT: vctp.16 r1
1251 ; CHECK-NEXT: vmult.i16 q0, q0, r0
1254 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1255 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1256 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1257 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1258 %b = mul <8 x i16> %a, %x
1262 define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1263 ; CHECK-LABEL: mulqr_v16i8_x:
1264 ; CHECK: @ %bb.0: @ %entry
1265 ; CHECK-NEXT: vctp.8 r1
1267 ; CHECK-NEXT: vmult.i8 q0, q0, r0
1270 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1271 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1272 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1273 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1274 %b = mul <16 x i8> %a, %x
1278 define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1279 ; CHECK-LABEL: faddqr_v4f32_x:
1280 ; CHECK: @ %bb.0: @ %entry
1281 ; CHECK-NEXT: vmov r1, s4
1282 ; CHECK-NEXT: vctp.32 r0
1284 ; CHECK-NEXT: vaddt.f32 q0, q0, r1
1287 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1288 %i = insertelement <4 x float> undef, float %y, i64 0
1289 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1290 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
1291 %b = fadd <4 x float> %a, %x
1295 define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1296 ; CHECK-LABEL: faddqr_v8f16_x:
1297 ; CHECK: @ %bb.0: @ %entry
1298 ; CHECK-NEXT: vmov.f16 r1, s4
1299 ; CHECK-NEXT: vctp.16 r0
1301 ; CHECK-NEXT: vaddt.f16 q0, q0, r1
1304 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1305 %i = insertelement <8 x half> undef, half %y, i64 0
1306 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1307 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
1308 %b = fadd <8 x half> %a, %x
1312 define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1313 ; CHECK-LABEL: fsubqr_v4f32_x:
1314 ; CHECK: @ %bb.0: @ %entry
1315 ; CHECK-NEXT: vmov r1, s4
1316 ; CHECK-NEXT: vctp.32 r0
1318 ; CHECK-NEXT: vsubt.f32 q0, q0, r1
1321 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1322 %i = insertelement <4 x float> undef, float %y, i64 0
1323 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1324 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> zeroinitializer
1325 %b = fsub <4 x float> %x, %a
1329 define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1330 ; CHECK-LABEL: fsubqr_v8f16_x:
1331 ; CHECK: @ %bb.0: @ %entry
1332 ; CHECK-NEXT: vmov.f16 r1, s4
1333 ; CHECK-NEXT: vctp.16 r0
1335 ; CHECK-NEXT: vsubt.f16 q0, q0, r1
1338 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1339 %i = insertelement <8 x half> undef, half %y, i64 0
1340 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1341 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> zeroinitializer
1342 %b = fsub <8 x half> %x, %a
1346 define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1347 ; CHECK-LABEL: fmulqr_v4f32_x:
1348 ; CHECK: @ %bb.0: @ %entry
1349 ; CHECK-NEXT: vmov r1, s4
1350 ; CHECK-NEXT: vctp.32 r0
1352 ; CHECK-NEXT: vmult.f32 q0, q0, r1
1355 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1356 %i = insertelement <4 x float> undef, float %y, i64 0
1357 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1358 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1359 %b = fmul <4 x float> %a, %x
1363 define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1364 ; CHECK-LABEL: fmulqr_v8f16_x:
1365 ; CHECK: @ %bb.0: @ %entry
1366 ; CHECK-NEXT: vmov.f16 r1, s4
1367 ; CHECK-NEXT: vctp.16 r0
1369 ; CHECK-NEXT: vmult.f16 q0, q0, r1
1372 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1373 %i = insertelement <8 x half> undef, half %y, i64 0
1374 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1375 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
1376 %b = fmul <8 x half> %a, %x
1380 define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1381 ; CHECK-LABEL: sadd_satqr_v4i32_x:
1382 ; CHECK: @ %bb.0: @ %entry
1383 ; CHECK-NEXT: vctp.32 r1
1385 ; CHECK-NEXT: vqaddt.s32 q0, q0, r0
1388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1389 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1390 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1391 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1392 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1396 define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1397 ; CHECK-LABEL: sadd_satqr_v8i16_x:
1398 ; CHECK: @ %bb.0: @ %entry
1399 ; CHECK-NEXT: vctp.16 r1
1401 ; CHECK-NEXT: vqaddt.s16 q0, q0, r0
1404 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1405 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1406 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1407 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1408 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1412 define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1413 ; CHECK-LABEL: sadd_satqr_v16i8_x:
1414 ; CHECK: @ %bb.0: @ %entry
1415 ; CHECK-NEXT: vctp.8 r1
1417 ; CHECK-NEXT: vqaddt.s8 q0, q0, r0
1420 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1421 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1422 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1423 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1424 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1428 define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1429 ; CHECK-LABEL: uadd_satqr_v4i32_x:
1430 ; CHECK: @ %bb.0: @ %entry
1431 ; CHECK-NEXT: vctp.32 r1
1433 ; CHECK-NEXT: vqaddt.u32 q0, q0, r0
1436 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1437 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1438 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1439 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1440 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1444 define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1445 ; CHECK-LABEL: uadd_satqr_v8i16_x:
1446 ; CHECK: @ %bb.0: @ %entry
1447 ; CHECK-NEXT: vctp.16 r1
1449 ; CHECK-NEXT: vqaddt.u16 q0, q0, r0
1452 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1453 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1454 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1455 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1456 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1460 define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1461 ; CHECK-LABEL: uadd_satqr_v16i8_x:
1462 ; CHECK: @ %bb.0: @ %entry
1463 ; CHECK-NEXT: vctp.8 r1
1465 ; CHECK-NEXT: vqaddt.u8 q0, q0, r0
1468 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1469 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1470 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1471 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1472 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1476 define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1477 ; CHECK-LABEL: ssub_satqr_v4i32_x:
1478 ; CHECK: @ %bb.0: @ %entry
1479 ; CHECK-NEXT: vctp.32 r1
1481 ; CHECK-NEXT: vqsubt.s32 q0, q0, r0
1484 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1485 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1486 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1487 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1488 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1492 define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1493 ; CHECK-LABEL: ssub_satqr_v8i16_x:
1494 ; CHECK: @ %bb.0: @ %entry
1495 ; CHECK-NEXT: vctp.16 r1
1497 ; CHECK-NEXT: vqsubt.s16 q0, q0, r0
1500 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1501 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1502 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1503 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1504 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1508 define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1509 ; CHECK-LABEL: ssub_satqr_v16i8_x:
1510 ; CHECK: @ %bb.0: @ %entry
1511 ; CHECK-NEXT: vctp.8 r1
1513 ; CHECK-NEXT: vqsubt.s8 q0, q0, r0
1516 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1517 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1518 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1519 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1520 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1524 define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1525 ; CHECK-LABEL: usub_satqr_v4i32_x:
1526 ; CHECK: @ %bb.0: @ %entry
1527 ; CHECK-NEXT: vctp.32 r1
1529 ; CHECK-NEXT: vqsubt.u32 q0, q0, r0
1532 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1533 %i = insertelement <4 x i32> undef, i32 %y, i64 0
1534 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1535 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1536 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1540 define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1541 ; CHECK-LABEL: usub_satqr_v8i16_x:
1542 ; CHECK: @ %bb.0: @ %entry
1543 ; CHECK-NEXT: vctp.16 r1
1545 ; CHECK-NEXT: vqsubt.u16 q0, q0, r0
1548 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1549 %i = insertelement <8 x i16> undef, i16 %y, i64 0
1550 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1551 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1552 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1556 define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1557 ; CHECK-LABEL: usub_satqr_v16i8_x:
1558 ; CHECK: @ %bb.0: @ %entry
1559 ; CHECK-NEXT: vctp.8 r1
1561 ; CHECK-NEXT: vqsubt.u8 q0, q0, r0
1564 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1565 %i = insertelement <16 x i8> undef, i8 %y, i64 0
1566 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1567 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1568 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1572 define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1573 ; CHECK-LABEL: add_v4i32_y:
1574 ; CHECK: @ %bb.0: @ %entry
1575 ; CHECK-NEXT: vctp.32 r0
1577 ; CHECK-NEXT: vaddt.i32 q1, q1, q0
1578 ; CHECK-NEXT: vmov q0, q1
1581 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1582 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1583 %b = add <4 x i32> %a, %y
1587 define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1588 ; CHECK-LABEL: add_v8i16_y:
1589 ; CHECK: @ %bb.0: @ %entry
1590 ; CHECK-NEXT: vctp.16 r0
1592 ; CHECK-NEXT: vaddt.i16 q1, q1, q0
1593 ; CHECK-NEXT: vmov q0, q1
1596 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1597 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1598 %b = add <8 x i16> %a, %y
1602 define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1603 ; CHECK-LABEL: add_v16i8_y:
1604 ; CHECK: @ %bb.0: @ %entry
1605 ; CHECK-NEXT: vctp.8 r0
1607 ; CHECK-NEXT: vaddt.i8 q1, q1, q0
1608 ; CHECK-NEXT: vmov q0, q1
1611 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1612 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1613 %b = add <16 x i8> %a, %y
1617 define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1618 ; CHECK-LABEL: sub_v4i32_y:
1619 ; CHECK: @ %bb.0: @ %entry
1620 ; CHECK-NEXT: vctp.32 r0
1622 ; CHECK-NEXT: vsubt.i32 q1, q0, q1
1623 ; CHECK-NEXT: vmov q0, q1
1626 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1627 %a = sub <4 x i32> %x, %y
1628 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1632 define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1633 ; CHECK-LABEL: sub_v8i16_y:
1634 ; CHECK: @ %bb.0: @ %entry
1635 ; CHECK-NEXT: vctp.16 r0
1637 ; CHECK-NEXT: vsubt.i16 q1, q0, q1
1638 ; CHECK-NEXT: vmov q0, q1
1641 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1642 %a = sub <8 x i16> %x, %y
1643 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1647 define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1648 ; CHECK-LABEL: sub_v16i8_y:
1649 ; CHECK: @ %bb.0: @ %entry
1650 ; CHECK-NEXT: vctp.8 r0
1652 ; CHECK-NEXT: vsubt.i8 q1, q0, q1
1653 ; CHECK-NEXT: vmov q0, q1
1656 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1657 %a = sub <16 x i8> %x, %y
1658 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1662 define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1663 ; CHECK-LABEL: mul_v4i32_y:
1664 ; CHECK: @ %bb.0: @ %entry
1665 ; CHECK-NEXT: vctp.32 r0
1667 ; CHECK-NEXT: vmult.i32 q1, q1, q0
1668 ; CHECK-NEXT: vmov q0, q1
1671 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1672 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1673 %b = mul <4 x i32> %a, %y
1677 define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1678 ; CHECK-LABEL: mul_v8i16_y:
1679 ; CHECK: @ %bb.0: @ %entry
1680 ; CHECK-NEXT: vctp.16 r0
1682 ; CHECK-NEXT: vmult.i16 q1, q1, q0
1683 ; CHECK-NEXT: vmov q0, q1
1686 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1687 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1688 %b = mul <8 x i16> %a, %y
1692 define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1693 ; CHECK-LABEL: mul_v16i8_y:
1694 ; CHECK: @ %bb.0: @ %entry
1695 ; CHECK-NEXT: vctp.8 r0
1697 ; CHECK-NEXT: vmult.i8 q1, q1, q0
1698 ; CHECK-NEXT: vmov q0, q1
1701 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1702 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1703 %b = mul <16 x i8> %a, %y
1707 define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1708 ; CHECK-LABEL: and_v4i32_y:
1709 ; CHECK: @ %bb.0: @ %entry
1710 ; CHECK-NEXT: vctp.32 r0
1712 ; CHECK-NEXT: vandt q1, q1, q0
1713 ; CHECK-NEXT: vmov q0, q1
1716 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1717 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1718 %b = and <4 x i32> %a, %y
1722 define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1723 ; CHECK-LABEL: and_v8i16_y:
1724 ; CHECK: @ %bb.0: @ %entry
1725 ; CHECK-NEXT: vctp.16 r0
1727 ; CHECK-NEXT: vandt q1, q1, q0
1728 ; CHECK-NEXT: vmov q0, q1
1731 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1732 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1733 %b = and <8 x i16> %a, %y
1737 define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1738 ; CHECK-LABEL: and_v16i8_y:
1739 ; CHECK: @ %bb.0: @ %entry
1740 ; CHECK-NEXT: vctp.8 r0
1742 ; CHECK-NEXT: vandt q1, q1, q0
1743 ; CHECK-NEXT: vmov q0, q1
1746 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1747 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1748 %b = and <16 x i8> %a, %y
1752 define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1753 ; CHECK-LABEL: or_v4i32_y:
1754 ; CHECK: @ %bb.0: @ %entry
1755 ; CHECK-NEXT: vctp.32 r0
1757 ; CHECK-NEXT: vorrt q1, q1, q0
1758 ; CHECK-NEXT: vmov q0, q1
1761 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1762 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1763 %b = or <4 x i32> %a, %y
1767 define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1768 ; CHECK-LABEL: or_v8i16_y:
1769 ; CHECK: @ %bb.0: @ %entry
1770 ; CHECK-NEXT: vctp.16 r0
1772 ; CHECK-NEXT: vorrt q1, q1, q0
1773 ; CHECK-NEXT: vmov q0, q1
1776 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1777 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1778 %b = or <8 x i16> %a, %y
1782 define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1783 ; CHECK-LABEL: or_v16i8_y:
1784 ; CHECK: @ %bb.0: @ %entry
1785 ; CHECK-NEXT: vctp.8 r0
1787 ; CHECK-NEXT: vorrt q1, q1, q0
1788 ; CHECK-NEXT: vmov q0, q1
1791 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1792 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1793 %b = or <16 x i8> %a, %y
1797 define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1798 ; CHECK-LABEL: xor_v4i32_y:
1799 ; CHECK: @ %bb.0: @ %entry
1800 ; CHECK-NEXT: vctp.32 r0
1802 ; CHECK-NEXT: veort q1, q1, q0
1803 ; CHECK-NEXT: vmov q0, q1
1806 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1807 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1808 %b = xor <4 x i32> %a, %y
1812 define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1813 ; CHECK-LABEL: xor_v8i16_y:
1814 ; CHECK: @ %bb.0: @ %entry
1815 ; CHECK-NEXT: vctp.16 r0
1817 ; CHECK-NEXT: veort q1, q1, q0
1818 ; CHECK-NEXT: vmov q0, q1
1821 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1822 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1823 %b = xor <8 x i16> %a, %y
1827 define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1828 ; CHECK-LABEL: xor_v16i8_y:
1829 ; CHECK: @ %bb.0: @ %entry
1830 ; CHECK-NEXT: vctp.8 r0
1832 ; CHECK-NEXT: veort q1, q1, q0
1833 ; CHECK-NEXT: vmov q0, q1
1836 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1837 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1838 %b = xor <16 x i8> %a, %y
1842 define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1843 ; CHECK-LABEL: shl_v4i32_y:
1844 ; CHECK: @ %bb.0: @ %entry
1845 ; CHECK-NEXT: vctp.32 r0
1847 ; CHECK-NEXT: vshlt.u32 q1, q0, q1
1848 ; CHECK-NEXT: vmov q0, q1
1851 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1852 %a = shl <4 x i32> %x, %y
1853 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1857 define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1858 ; CHECK-LABEL: shl_v8i16_y:
1859 ; CHECK: @ %bb.0: @ %entry
1860 ; CHECK-NEXT: vctp.16 r0
1862 ; CHECK-NEXT: vshlt.u16 q1, q0, q1
1863 ; CHECK-NEXT: vmov q0, q1
1866 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1867 %a = shl <8 x i16> %x, %y
1868 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1872 define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1873 ; CHECK-LABEL: shl_v16i8_y:
1874 ; CHECK: @ %bb.0: @ %entry
1875 ; CHECK-NEXT: vctp.8 r0
1877 ; CHECK-NEXT: vshlt.u8 q1, q0, q1
1878 ; CHECK-NEXT: vmov q0, q1
1881 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1882 %a = shl <16 x i8> %x, %y
1883 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1887 define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1888 ; CHECK-LABEL: ashr_v4i32_y:
1889 ; CHECK: @ %bb.0: @ %entry
1890 ; CHECK-NEXT: vneg.s32 q2, q1
1891 ; CHECK-NEXT: vctp.32 r0
1893 ; CHECK-NEXT: vshlt.s32 q1, q0, q2
1894 ; CHECK-NEXT: vmov q0, q1
1897 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1898 %a = ashr <4 x i32> %x, %y
1899 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1903 define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1904 ; CHECK-LABEL: ashr_v8i16_y:
1905 ; CHECK: @ %bb.0: @ %entry
1906 ; CHECK-NEXT: vneg.s16 q2, q1
1907 ; CHECK-NEXT: vctp.16 r0
1909 ; CHECK-NEXT: vshlt.s16 q1, q0, q2
1910 ; CHECK-NEXT: vmov q0, q1
1913 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1914 %a = ashr <8 x i16> %x, %y
1915 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1919 define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1920 ; CHECK-LABEL: ashr_v16i8_y:
1921 ; CHECK: @ %bb.0: @ %entry
1922 ; CHECK-NEXT: vneg.s8 q2, q1
1923 ; CHECK-NEXT: vctp.8 r0
1925 ; CHECK-NEXT: vshlt.s8 q1, q0, q2
1926 ; CHECK-NEXT: vmov q0, q1
1929 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1930 %a = ashr <16 x i8> %x, %y
1931 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1935 define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1936 ; CHECK-LABEL: lshr_v4i32_y:
1937 ; CHECK: @ %bb.0: @ %entry
1938 ; CHECK-NEXT: vneg.s32 q2, q1
1939 ; CHECK-NEXT: vctp.32 r0
1941 ; CHECK-NEXT: vshlt.u32 q1, q0, q2
1942 ; CHECK-NEXT: vmov q0, q1
1945 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1946 %a = lshr <4 x i32> %x, %y
1947 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1951 define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1952 ; CHECK-LABEL: lshr_v8i16_y:
1953 ; CHECK: @ %bb.0: @ %entry
1954 ; CHECK-NEXT: vneg.s16 q2, q1
1955 ; CHECK-NEXT: vctp.16 r0
1957 ; CHECK-NEXT: vshlt.u16 q1, q0, q2
1958 ; CHECK-NEXT: vmov q0, q1
1961 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1962 %a = lshr <8 x i16> %x, %y
1963 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1967 define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1968 ; CHECK-LABEL: lshr_v16i8_y:
1969 ; CHECK: @ %bb.0: @ %entry
1970 ; CHECK-NEXT: vneg.s8 q2, q1
1971 ; CHECK-NEXT: vctp.8 r0
1973 ; CHECK-NEXT: vshlt.u8 q1, q0, q2
1974 ; CHECK-NEXT: vmov q0, q1
1977 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1978 %a = lshr <16 x i8> %x, %y
1979 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1983 define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1984 ; CHECK-LABEL: andnot_v4i32_y:
1985 ; CHECK: @ %bb.0: @ %entry
1986 ; CHECK-NEXT: vctp.32 r0
1988 ; CHECK-NEXT: vbict q1, q0, q1
1989 ; CHECK-NEXT: vmov q0, q1
1992 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1993 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1994 %a = and <4 x i32> %y1, %x
1995 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1999 define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2000 ; CHECK-LABEL: andnot_v8i16_y:
2001 ; CHECK: @ %bb.0: @ %entry
2002 ; CHECK-NEXT: vctp.16 r0
2004 ; CHECK-NEXT: vbict q1, q0, q1
2005 ; CHECK-NEXT: vmov q0, q1
2008 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2009 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
2010 %a = and <8 x i16> %y1, %x
2011 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2015 define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2016 ; CHECK-LABEL: andnot_v16i8_y:
2017 ; CHECK: @ %bb.0: @ %entry
2018 ; CHECK-NEXT: vctp.8 r0
2020 ; CHECK-NEXT: vbict q1, q0, q1
2021 ; CHECK-NEXT: vmov q0, q1
2024 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2025 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
2026 %a = and <16 x i8> %y1, %x
2027 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2031 define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2032 ; CHECK-LABEL: ornot_v4i32_y:
2033 ; CHECK: @ %bb.0: @ %entry
2034 ; CHECK-NEXT: vctp.32 r0
2036 ; CHECK-NEXT: vornt q1, q0, q1
2037 ; CHECK-NEXT: vmov q0, q1
2040 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2041 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
2042 %a = or <4 x i32> %y1, %x
2043 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2047 define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2048 ; CHECK-LABEL: ornot_v8i16_y:
2049 ; CHECK: @ %bb.0: @ %entry
2050 ; CHECK-NEXT: vctp.16 r0
2052 ; CHECK-NEXT: vornt q1, q0, q1
2053 ; CHECK-NEXT: vmov q0, q1
2056 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2057 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
2058 %a = or <8 x i16> %y1, %x
2059 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2063 define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2064 ; CHECK-LABEL: ornot_v16i8_y:
2065 ; CHECK: @ %bb.0: @ %entry
2066 ; CHECK-NEXT: vctp.8 r0
2068 ; CHECK-NEXT: vornt q1, q0, q1
2069 ; CHECK-NEXT: vmov q0, q1
2072 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2073 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
2074 %a = or <16 x i8> %y1, %x
2075 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2079 define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2080 ; CHECK-LABEL: fadd_v4f32_y:
2081 ; CHECK: @ %bb.0: @ %entry
2082 ; CHECK-NEXT: vctp.32 r0
2084 ; CHECK-NEXT: vaddt.f32 q1, q1, q0
2085 ; CHECK-NEXT: vmov q0, q1
2088 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2089 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
2090 %b = fadd <4 x float> %a, %y
2094 define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2095 ; CHECK-LABEL: fadd_v8f16_y:
2096 ; CHECK: @ %bb.0: @ %entry
2097 ; CHECK-NEXT: vctp.16 r0
2099 ; CHECK-NEXT: vaddt.f16 q1, q1, q0
2100 ; CHECK-NEXT: vmov q0, q1
2103 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2104 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
2105 %b = fadd <8 x half> %a, %y
2109 define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2110 ; CHECK-LABEL: fsub_v4f32_y:
2111 ; CHECK: @ %bb.0: @ %entry
2112 ; CHECK-NEXT: vctp.32 r0
2114 ; CHECK-NEXT: vsubt.f32 q1, q0, q1
2115 ; CHECK-NEXT: vmov q0, q1
2118 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2119 %a = fsub <4 x float> %x, %y
2120 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2124 define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2125 ; CHECK-LABEL: fsub_v8f16_y:
2126 ; CHECK: @ %bb.0: @ %entry
2127 ; CHECK-NEXT: vctp.16 r0
2129 ; CHECK-NEXT: vsubt.f16 q1, q0, q1
2130 ; CHECK-NEXT: vmov q0, q1
2133 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2134 %a = fsub <8 x half> %x, %y
2135 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2139 define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2140 ; CHECK-LABEL: fmul_v4f32_y:
2141 ; CHECK: @ %bb.0: @ %entry
2142 ; CHECK-NEXT: vctp.32 r0
2144 ; CHECK-NEXT: vmult.f32 q1, q1, q0
2145 ; CHECK-NEXT: vmov q0, q1
2148 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2149 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
2150 %b = fmul <4 x float> %a, %y
2154 define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2155 ; CHECK-LABEL: fmul_v8f16_y:
2156 ; CHECK: @ %bb.0: @ %entry
2157 ; CHECK-NEXT: vctp.16 r0
2159 ; CHECK-NEXT: vmult.f16 q1, q1, q0
2160 ; CHECK-NEXT: vmov q0, q1
2163 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2164 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
2165 %b = fmul <8 x half> %a, %y
2169 define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2170 ; CHECK-LABEL: fdiv_v4f32_y:
2171 ; CHECK: @ %bb.0: @ %entry
2172 ; CHECK-NEXT: vdiv.f32 s3, s3, s7
2173 ; CHECK-NEXT: vctp.32 r0
2174 ; CHECK-NEXT: vdiv.f32 s2, s2, s6
2175 ; CHECK-NEXT: vdiv.f32 s1, s1, s5
2176 ; CHECK-NEXT: vdiv.f32 s0, s0, s4
2178 ; CHECK-NEXT: vmovt q1, q0
2179 ; CHECK-NEXT: vmov q0, q1
2182 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2183 %a = fdiv <4 x float> %x, %y
2184 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2188 define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2189 ; CHECK-LABEL: fdiv_v8f16_y:
2190 ; CHECK: @ %bb.0: @ %entry
2191 ; CHECK-NEXT: vmovx.f16 s10, s0
2192 ; CHECK-NEXT: vmovx.f16 s8, s4
2193 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
2194 ; CHECK-NEXT: vdiv.f16 s0, s0, s4
2195 ; CHECK-NEXT: vins.f16 s0, s8
2196 ; CHECK-NEXT: vmovx.f16 s10, s1
2197 ; CHECK-NEXT: vmovx.f16 s8, s5
2198 ; CHECK-NEXT: vdiv.f16 s1, s1, s5
2199 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
2200 ; CHECK-NEXT: vmovx.f16 s10, s2
2201 ; CHECK-NEXT: vins.f16 s1, s8
2202 ; CHECK-NEXT: vmovx.f16 s8, s6
2203 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
2204 ; CHECK-NEXT: vdiv.f16 s2, s2, s6
2205 ; CHECK-NEXT: vins.f16 s2, s8
2206 ; CHECK-NEXT: vmovx.f16 s10, s3
2207 ; CHECK-NEXT: vmovx.f16 s8, s7
2208 ; CHECK-NEXT: vdiv.f16 s3, s3, s7
2209 ; CHECK-NEXT: vdiv.f16 s8, s10, s8
2210 ; CHECK-NEXT: vctp.16 r0
2211 ; CHECK-NEXT: vins.f16 s3, s8
2213 ; CHECK-NEXT: vmovt q1, q0
2214 ; CHECK-NEXT: vmov q0, q1
2217 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2218 %a = fdiv <8 x half> %x, %y
2219 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2223 define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
2224 ; CHECK-LABEL: fmai_v4f32_y:
2225 ; CHECK: @ %bb.0: @ %entry
2226 ; CHECK-NEXT: vfma.f32 q0, q1, q2
2227 ; CHECK-NEXT: vctp.32 r0
2229 ; CHECK-NEXT: vmovt q1, q0
2230 ; CHECK-NEXT: vmov q0, q1
2233 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2234 %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x)
2235 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2239 define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
2240 ; CHECK-LABEL: fmai_v8f16_y:
2241 ; CHECK: @ %bb.0: @ %entry
2242 ; CHECK-NEXT: vfma.f16 q0, q1, q2
2243 ; CHECK-NEXT: vctp.16 r0
2245 ; CHECK-NEXT: vmovt q1, q0
2246 ; CHECK-NEXT: vmov q0, q1
2249 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2250 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
2251 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2255 define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
2256 ; CHECK-LABEL: fma_v4f32_y:
2257 ; CHECK: @ %bb.0: @ %entry
2258 ; CHECK-NEXT: vfma.f32 q0, q1, q2
2259 ; CHECK-NEXT: vctp.32 r0
2261 ; CHECK-NEXT: vmovt q1, q0
2262 ; CHECK-NEXT: vmov q0, q1
2265 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2266 %m = fmul fast <4 x float> %y, %z
2267 %a = fadd fast <4 x float> %m, %x
2268 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2272 define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
2273 ; CHECK-LABEL: fma_v8f16_y:
2274 ; CHECK: @ %bb.0: @ %entry
2275 ; CHECK-NEXT: vfma.f16 q0, q1, q2
2276 ; CHECK-NEXT: vctp.16 r0
2278 ; CHECK-NEXT: vmovt q1, q0
2279 ; CHECK-NEXT: vmov q0, q1
2282 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2283 %m = fmul fast <8 x half> %y, %z
2284 %a = fadd fast <8 x half> %m, %x
2285 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2289 define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2290 ; CHECK-LABEL: icmp_slt_v4i32_y:
2291 ; CHECK: @ %bb.0: @ %entry
2292 ; CHECK-NEXT: vctp.32 r0
2294 ; CHECK-NEXT: vmint.s32 q1, q0, q1
2295 ; CHECK-NEXT: vmov q0, q1
2298 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2299 %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y)
2300 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2304 define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2305 ; CHECK-LABEL: icmp_slt_v8i16_y:
2306 ; CHECK: @ %bb.0: @ %entry
2307 ; CHECK-NEXT: vctp.16 r0
2309 ; CHECK-NEXT: vmint.s16 q1, q0, q1
2310 ; CHECK-NEXT: vmov q0, q1
2313 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2314 %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y)
2315 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2319 define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2320 ; CHECK-LABEL: icmp_slt_v16i8_y:
2321 ; CHECK: @ %bb.0: @ %entry
2322 ; CHECK-NEXT: vctp.8 r0
2324 ; CHECK-NEXT: vmint.s8 q1, q0, q1
2325 ; CHECK-NEXT: vmov q0, q1
2328 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2329 %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
2330 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2334 define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2335 ; CHECK-LABEL: icmp_sgt_v4i32_y:
2336 ; CHECK: @ %bb.0: @ %entry
2337 ; CHECK-NEXT: vctp.32 r0
2339 ; CHECK-NEXT: vmaxt.s32 q1, q0, q1
2340 ; CHECK-NEXT: vmov q0, q1
2343 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2344 %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y)
2345 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2349 define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2350 ; CHECK-LABEL: icmp_sgt_v8i16_y:
2351 ; CHECK: @ %bb.0: @ %entry
2352 ; CHECK-NEXT: vctp.16 r0
2354 ; CHECK-NEXT: vmaxt.s16 q1, q0, q1
2355 ; CHECK-NEXT: vmov q0, q1
2358 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2359 %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y)
2360 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2364 define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2365 ; CHECK-LABEL: icmp_sgt_v16i8_y:
2366 ; CHECK: @ %bb.0: @ %entry
2367 ; CHECK-NEXT: vctp.8 r0
2369 ; CHECK-NEXT: vmaxt.s8 q1, q0, q1
2370 ; CHECK-NEXT: vmov q0, q1
2373 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2374 %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
2375 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2379 define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2380 ; CHECK-LABEL: icmp_ult_v4i32_y:
2381 ; CHECK: @ %bb.0: @ %entry
2382 ; CHECK-NEXT: vctp.32 r0
2384 ; CHECK-NEXT: vmint.u32 q1, q0, q1
2385 ; CHECK-NEXT: vmov q0, q1
2388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2389 %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y)
2390 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2394 define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2395 ; CHECK-LABEL: icmp_ult_v8i16_y:
2396 ; CHECK: @ %bb.0: @ %entry
2397 ; CHECK-NEXT: vctp.16 r0
2399 ; CHECK-NEXT: vmint.u16 q1, q0, q1
2400 ; CHECK-NEXT: vmov q0, q1
2403 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2404 %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y)
2405 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2409 define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2410 ; CHECK-LABEL: icmp_ult_v16i8_y:
2411 ; CHECK: @ %bb.0: @ %entry
2412 ; CHECK-NEXT: vctp.8 r0
2414 ; CHECK-NEXT: vmint.u8 q1, q0, q1
2415 ; CHECK-NEXT: vmov q0, q1
2418 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2419 %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
2420 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2424 define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2425 ; CHECK-LABEL: icmp_ugt_v4i32_y:
2426 ; CHECK: @ %bb.0: @ %entry
2427 ; CHECK-NEXT: vctp.32 r0
2429 ; CHECK-NEXT: vmaxt.u32 q1, q0, q1
2430 ; CHECK-NEXT: vmov q0, q1
2433 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2434 %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y)
2435 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2439 define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2440 ; CHECK-LABEL: icmp_ugt_v8i16_y:
2441 ; CHECK: @ %bb.0: @ %entry
2442 ; CHECK-NEXT: vctp.16 r0
2444 ; CHECK-NEXT: vmaxt.u16 q1, q0, q1
2445 ; CHECK-NEXT: vmov q0, q1
2448 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2449 %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y)
2450 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2454 define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2455 ; CHECK-LABEL: icmp_ugt_v16i8_y:
2456 ; CHECK: @ %bb.0: @ %entry
2457 ; CHECK-NEXT: vctp.8 r0
2459 ; CHECK-NEXT: vmaxt.u8 q1, q0, q1
2460 ; CHECK-NEXT: vmov q0, q1
2463 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2464 %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
2465 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2469 define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2470 ; CHECK-LABEL: fcmp_fast_olt_v4f32_y:
2471 ; CHECK: @ %bb.0: @ %entry
2472 ; CHECK-NEXT: vctp.32 r0
2474 ; CHECK-NEXT: vcmpt.f32 gt, q1, q0
2475 ; CHECK-NEXT: vmovt q1, q0
2476 ; CHECK-NEXT: vmov q0, q1
2479 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2480 %a1 = fcmp fast olt <4 x float> %x, %y
2481 %0 = and <4 x i1> %c, %a1
2482 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y
2486 define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2487 ; CHECK-LABEL: fcmp_fast_olt_v8f16_y:
2488 ; CHECK: @ %bb.0: @ %entry
2489 ; CHECK-NEXT: vctp.16 r0
2491 ; CHECK-NEXT: vcmpt.f16 gt, q1, q0
2492 ; CHECK-NEXT: vmovt q1, q0
2493 ; CHECK-NEXT: vmov q0, q1
2496 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2497 %a1 = fcmp fast olt <8 x half> %x, %y
2498 %0 = and <8 x i1> %c, %a1
2499 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y
2503 define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2504 ; CHECK-LABEL: fcmp_fast_ogt_v4f32_y:
2505 ; CHECK: @ %bb.0: @ %entry
2506 ; CHECK-NEXT: vctp.32 r0
2508 ; CHECK-NEXT: vcmpt.f32 gt, q0, q1
2509 ; CHECK-NEXT: vmovt q1, q0
2510 ; CHECK-NEXT: vmov q0, q1
2513 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2514 %a1 = fcmp fast ogt <4 x float> %x, %y
2515 %0 = and <4 x i1> %c, %a1
2516 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y
2520 define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2521 ; CHECK-LABEL: fcmp_fast_ogt_v8f16_y:
2522 ; CHECK: @ %bb.0: @ %entry
2523 ; CHECK-NEXT: vctp.16 r0
2525 ; CHECK-NEXT: vcmpt.f16 gt, q0, q1
2526 ; CHECK-NEXT: vmovt q1, q0
2527 ; CHECK-NEXT: vmov q0, q1
2530 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2531 %a1 = fcmp fast ogt <8 x half> %x, %y
2532 %0 = and <8 x i1> %c, %a1
2533 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y
2537 define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2538 ; CHECK-LABEL: sadd_sat_v4i32_y:
2539 ; CHECK: @ %bb.0: @ %entry
2540 ; CHECK-NEXT: vctp.32 r0
2542 ; CHECK-NEXT: vqaddt.s32 q1, q0, q1
2543 ; CHECK-NEXT: vmov q0, q1
2546 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2547 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2548 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2552 define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2553 ; CHECK-LABEL: sadd_sat_v8i16_y:
2554 ; CHECK: @ %bb.0: @ %entry
2555 ; CHECK-NEXT: vctp.16 r0
2557 ; CHECK-NEXT: vqaddt.s16 q1, q0, q1
2558 ; CHECK-NEXT: vmov q0, q1
2561 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2562 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2563 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2567 define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2568 ; CHECK-LABEL: sadd_sat_v16i8_y:
2569 ; CHECK: @ %bb.0: @ %entry
2570 ; CHECK-NEXT: vctp.8 r0
2572 ; CHECK-NEXT: vqaddt.s8 q1, q0, q1
2573 ; CHECK-NEXT: vmov q0, q1
2576 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2577 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2578 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2582 define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2583 ; CHECK-LABEL: uadd_sat_v4i32_y:
2584 ; CHECK: @ %bb.0: @ %entry
2585 ; CHECK-NEXT: vctp.32 r0
2587 ; CHECK-NEXT: vqaddt.u32 q1, q0, q1
2588 ; CHECK-NEXT: vmov q0, q1
2591 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2592 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2593 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2597 define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2598 ; CHECK-LABEL: uadd_sat_v8i16_y:
2599 ; CHECK: @ %bb.0: @ %entry
2600 ; CHECK-NEXT: vctp.16 r0
2602 ; CHECK-NEXT: vqaddt.u16 q1, q0, q1
2603 ; CHECK-NEXT: vmov q0, q1
2606 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2607 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2608 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2612 define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2613 ; CHECK-LABEL: uadd_sat_v16i8_y:
2614 ; CHECK: @ %bb.0: @ %entry
2615 ; CHECK-NEXT: vctp.8 r0
2617 ; CHECK-NEXT: vqaddt.u8 q1, q0, q1
2618 ; CHECK-NEXT: vmov q0, q1
2621 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2622 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2623 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2627 define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2628 ; CHECK-LABEL: ssub_sat_v4i32_y:
2629 ; CHECK: @ %bb.0: @ %entry
2630 ; CHECK-NEXT: vctp.32 r0
2632 ; CHECK-NEXT: vqsubt.s32 q1, q0, q1
2633 ; CHECK-NEXT: vmov q0, q1
2636 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2637 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2638 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2642 define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2643 ; CHECK-LABEL: ssub_sat_v8i16_y:
2644 ; CHECK: @ %bb.0: @ %entry
2645 ; CHECK-NEXT: vctp.16 r0
2647 ; CHECK-NEXT: vqsubt.s16 q1, q0, q1
2648 ; CHECK-NEXT: vmov q0, q1
2651 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2652 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2653 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2657 define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2658 ; CHECK-LABEL: ssub_sat_v16i8_y:
2659 ; CHECK: @ %bb.0: @ %entry
2660 ; CHECK-NEXT: vctp.8 r0
2662 ; CHECK-NEXT: vqsubt.s8 q1, q0, q1
2663 ; CHECK-NEXT: vmov q0, q1
2666 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2667 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2668 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2672 define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2673 ; CHECK-LABEL: usub_sat_v4i32_y:
2674 ; CHECK: @ %bb.0: @ %entry
2675 ; CHECK-NEXT: vctp.32 r0
2677 ; CHECK-NEXT: vqsubt.u32 q1, q0, q1
2678 ; CHECK-NEXT: vmov q0, q1
2681 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2682 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2683 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2687 define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2688 ; CHECK-LABEL: usub_sat_v8i16_y:
2689 ; CHECK: @ %bb.0: @ %entry
2690 ; CHECK-NEXT: vctp.16 r0
2692 ; CHECK-NEXT: vqsubt.u16 q1, q0, q1
2693 ; CHECK-NEXT: vmov q0, q1
2696 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2697 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2698 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2702 define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2703 ; CHECK-LABEL: usub_sat_v16i8_y:
2704 ; CHECK: @ %bb.0: @ %entry
2705 ; CHECK-NEXT: vctp.8 r0
2707 ; CHECK-NEXT: vqsubt.u8 q1, q0, q1
2708 ; CHECK-NEXT: vmov q0, q1
2711 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2712 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2713 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2717 define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2718 ; CHECK-LABEL: addqr_v4i32_y:
2719 ; CHECK: @ %bb.0: @ %entry
2720 ; CHECK-NEXT: vdup.32 q1, r0
2721 ; CHECK-NEXT: vctp.32 r1
2723 ; CHECK-NEXT: vaddt.i32 q1, q0, r0
2724 ; CHECK-NEXT: vmov q0, q1
2727 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2728 %i = insertelement <4 x i32> undef, i32 %y, i64 0
2729 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2730 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
2731 %b = add <4 x i32> %ys, %a
2735 define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2736 ; CHECK-LABEL: addqr_v8i16_y:
2737 ; CHECK: @ %bb.0: @ %entry
2738 ; CHECK-NEXT: vdup.16 q1, r0
2739 ; CHECK-NEXT: vctp.16 r1
2741 ; CHECK-NEXT: vaddt.i16 q1, q0, r0
2742 ; CHECK-NEXT: vmov q0, q1
2745 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2746 %i = insertelement <8 x i16> undef, i16 %y, i64 0
2747 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2748 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
2749 %b = add <8 x i16> %ys, %a
2753 define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2754 ; CHECK-LABEL: addqr_v16i8_y:
2755 ; CHECK: @ %bb.0: @ %entry
2756 ; CHECK-NEXT: vdup.8 q1, r0
2757 ; CHECK-NEXT: vctp.8 r1
2759 ; CHECK-NEXT: vaddt.i8 q1, q0, r0
2760 ; CHECK-NEXT: vmov q0, q1
2763 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2764 %i = insertelement <16 x i8> undef, i8 %y, i64 0
2765 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2766 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
2767 %b = add <16 x i8> %ys, %a
2771 define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2772 ; CHECK-LABEL: subqr_v4i32_y:
2773 ; CHECK: @ %bb.0: @ %entry
2774 ; CHECK-NEXT: vdup.32 q1, r0
2775 ; CHECK-NEXT: vctp.32 r1
2777 ; CHECK-NEXT: vsubt.i32 q1, q0, r0
2778 ; CHECK-NEXT: vmov q0, q1
2781 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2782 %i = insertelement <4 x i32> undef, i32 %y, i64 0
2783 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2784 %a = sub <4 x i32> %x, %ys
2785 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2789 define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2790 ; CHECK-LABEL: subqr_v8i16_y:
2791 ; CHECK: @ %bb.0: @ %entry
2792 ; CHECK-NEXT: vdup.16 q1, r0
2793 ; CHECK-NEXT: vctp.16 r1
2795 ; CHECK-NEXT: vsubt.i16 q1, q0, r0
2796 ; CHECK-NEXT: vmov q0, q1
2799 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2800 %i = insertelement <8 x i16> undef, i16 %y, i64 0
2801 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2802 %a = sub <8 x i16> %x, %ys
2803 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2807 define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2808 ; CHECK-LABEL: subqr_v16i8_y:
2809 ; CHECK: @ %bb.0: @ %entry
2810 ; CHECK-NEXT: vdup.8 q1, r0
2811 ; CHECK-NEXT: vctp.8 r1
2813 ; CHECK-NEXT: vsubt.i8 q1, q0, r0
2814 ; CHECK-NEXT: vmov q0, q1
2817 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2818 %i = insertelement <16 x i8> undef, i8 %y, i64 0
2819 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2820 %a = sub <16 x i8> %x, %ys
2821 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2825 define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2826 ; CHECK-LABEL: mulqr_v4i32_y:
2827 ; CHECK: @ %bb.0: @ %entry
2828 ; CHECK-NEXT: vdup.32 q1, r0
2829 ; CHECK-NEXT: vctp.32 r1
2831 ; CHECK-NEXT: vmult.i32 q1, q0, r0
2832 ; CHECK-NEXT: vmov q0, q1
2835 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2836 %i = insertelement <4 x i32> undef, i32 %y, i64 0
2837 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2838 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2839 %b = mul <4 x i32> %ys, %a
2843 define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2844 ; CHECK-LABEL: mulqr_v8i16_y:
2845 ; CHECK: @ %bb.0: @ %entry
2846 ; CHECK-NEXT: vdup.16 q1, r0
2847 ; CHECK-NEXT: vctp.16 r1
2849 ; CHECK-NEXT: vmult.i16 q1, q0, r0
2850 ; CHECK-NEXT: vmov q0, q1
2853 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2854 %i = insertelement <8 x i16> undef, i16 %y, i64 0
2855 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2856 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2857 %b = mul <8 x i16> %ys, %a
2861 define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2862 ; CHECK-LABEL: mulqr_v16i8_y:
2863 ; CHECK: @ %bb.0: @ %entry
2864 ; CHECK-NEXT: vdup.8 q1, r0
2865 ; CHECK-NEXT: vctp.8 r1
2867 ; CHECK-NEXT: vmult.i8 q1, q0, r0
2868 ; CHECK-NEXT: vmov q0, q1
2871 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2872 %i = insertelement <16 x i8> undef, i8 %y, i64 0
2873 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2874 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2875 %b = mul <16 x i8> %ys, %a
2879 define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2880 ; CHECK-LABEL: faddqr_v4f32_y:
2881 ; CHECK: @ %bb.0: @ %entry
2882 ; CHECK-NEXT: vmov r1, s4
2883 ; CHECK-NEXT: vctp.32 r0
2884 ; CHECK-NEXT: vdup.32 q1, r1
2886 ; CHECK-NEXT: vaddt.f32 q1, q0, r1
2887 ; CHECK-NEXT: vmov q0, q1
2890 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2891 %i = insertelement <4 x float> undef, float %y, i64 0
2892 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2893 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
2894 %b = fadd <4 x float> %ys, %a
2898 define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2899 ; CHECK-LABEL: faddqr_v8f16_y:
2900 ; CHECK: @ %bb.0: @ %entry
2901 ; CHECK-NEXT: vmov.f16 r1, s4
2902 ; CHECK-NEXT: vctp.16 r0
2903 ; CHECK-NEXT: vdup.16 q1, r1
2905 ; CHECK-NEXT: vaddt.f16 q1, q0, r1
2906 ; CHECK-NEXT: vmov q0, q1
2909 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2910 %i = insertelement <8 x half> undef, half %y, i64 0
2911 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2912 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
2913 %b = fadd <8 x half> %ys, %a
2917 define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2918 ; CHECK-LABEL: fsubqr_v4f32_y:
2919 ; CHECK: @ %bb.0: @ %entry
2920 ; CHECK-NEXT: vmov r1, s4
2921 ; CHECK-NEXT: vctp.32 r0
2922 ; CHECK-NEXT: vdup.32 q1, r1
2924 ; CHECK-NEXT: vsubt.f32 q1, q0, r1
2925 ; CHECK-NEXT: vmov q0, q1
2928 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2929 %i = insertelement <4 x float> undef, float %y, i64 0
2930 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2931 %a = fsub <4 x float> %x, %ys
2932 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2936 define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2937 ; CHECK-LABEL: fsubqr_v8f16_y:
2938 ; CHECK: @ %bb.0: @ %entry
2939 ; CHECK-NEXT: vmov.f16 r1, s4
2940 ; CHECK-NEXT: vctp.16 r0
2941 ; CHECK-NEXT: vdup.16 q1, r1
2943 ; CHECK-NEXT: vsubt.f16 q1, q0, r1
2944 ; CHECK-NEXT: vmov q0, q1
2947 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2948 %i = insertelement <8 x half> undef, half %y, i64 0
2949 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2950 %a = fsub <8 x half> %x, %ys
2951 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2955 define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2956 ; CHECK-LABEL: fmulqr_v4f32_y:
2957 ; CHECK: @ %bb.0: @ %entry
2958 ; CHECK-NEXT: vmov r1, s4
2959 ; CHECK-NEXT: vctp.32 r0
2960 ; CHECK-NEXT: vdup.32 q1, r1
2962 ; CHECK-NEXT: vmult.f32 q1, q0, r1
2963 ; CHECK-NEXT: vmov q0, q1
2966 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2967 %i = insertelement <4 x float> undef, float %y, i64 0
2968 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2969 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
2970 %b = fmul <4 x float> %ys, %a
2974 define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2975 ; CHECK-LABEL: fmulqr_v8f16_y:
2976 ; CHECK: @ %bb.0: @ %entry
2977 ; CHECK-NEXT: vmov.f16 r1, s4
2978 ; CHECK-NEXT: vctp.16 r0
2979 ; CHECK-NEXT: vdup.16 q1, r1
2981 ; CHECK-NEXT: vmult.f16 q1, q0, r1
2982 ; CHECK-NEXT: vmov q0, q1
2985 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2986 %i = insertelement <8 x half> undef, half %y, i64 0
2987 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2988 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
2989 %b = fmul <8 x half> %ys, %a
2993 define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2994 ; CHECK-LABEL: sadd_satqr_v4i32_y:
2995 ; CHECK: @ %bb.0: @ %entry
2996 ; CHECK-NEXT: vdup.32 q1, r0
2997 ; CHECK-NEXT: vctp.32 r1
2999 ; CHECK-NEXT: vqaddt.s32 q1, q0, r0
3000 ; CHECK-NEXT: vmov q0, q1
3003 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3004 %i = insertelement <4 x i32> undef, i32 %y, i64 0
3005 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3006 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3007 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3011 define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3012 ; CHECK-LABEL: sadd_satqr_v8i16_y:
3013 ; CHECK: @ %bb.0: @ %entry
3014 ; CHECK-NEXT: vdup.16 q1, r0
3015 ; CHECK-NEXT: vctp.16 r1
3017 ; CHECK-NEXT: vqaddt.s16 q1, q0, r0
3018 ; CHECK-NEXT: vmov q0, q1
3021 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3022 %i = insertelement <8 x i16> undef, i16 %y, i64 0
3023 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3024 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3025 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3029 define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3030 ; CHECK-LABEL: sadd_satqr_v16i8_y:
3031 ; CHECK: @ %bb.0: @ %entry
3032 ; CHECK-NEXT: vdup.8 q1, r0
3033 ; CHECK-NEXT: vctp.8 r1
3035 ; CHECK-NEXT: vqaddt.s8 q1, q0, r0
3036 ; CHECK-NEXT: vmov q0, q1
3039 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3040 %i = insertelement <16 x i8> undef, i8 %y, i64 0
3041 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3042 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3043 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3047 define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3048 ; CHECK-LABEL: uadd_satqr_v4i32_y:
3049 ; CHECK: @ %bb.0: @ %entry
3050 ; CHECK-NEXT: vdup.32 q1, r0
3051 ; CHECK-NEXT: vctp.32 r1
3053 ; CHECK-NEXT: vqaddt.u32 q1, q0, r0
3054 ; CHECK-NEXT: vmov q0, q1
3057 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3058 %i = insertelement <4 x i32> undef, i32 %y, i64 0
3059 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3060 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3061 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3065 define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3066 ; CHECK-LABEL: uadd_satqr_v8i16_y:
3067 ; CHECK: @ %bb.0: @ %entry
3068 ; CHECK-NEXT: vdup.16 q1, r0
3069 ; CHECK-NEXT: vctp.16 r1
3071 ; CHECK-NEXT: vqaddt.u16 q1, q0, r0
3072 ; CHECK-NEXT: vmov q0, q1
3075 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3076 %i = insertelement <8 x i16> undef, i16 %y, i64 0
3077 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3078 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3079 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3083 define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3084 ; CHECK-LABEL: uadd_satqr_v16i8_y:
3085 ; CHECK: @ %bb.0: @ %entry
3086 ; CHECK-NEXT: vdup.8 q1, r0
3087 ; CHECK-NEXT: vctp.8 r1
3089 ; CHECK-NEXT: vqaddt.u8 q1, q0, r0
3090 ; CHECK-NEXT: vmov q0, q1
3093 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3094 %i = insertelement <16 x i8> undef, i8 %y, i64 0
3095 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3096 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3097 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3101 define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3102 ; CHECK-LABEL: ssub_satqr_v4i32_y:
3103 ; CHECK: @ %bb.0: @ %entry
3104 ; CHECK-NEXT: vdup.32 q1, r0
3105 ; CHECK-NEXT: vctp.32 r1
3107 ; CHECK-NEXT: vqsubt.s32 q1, q0, r0
3108 ; CHECK-NEXT: vmov q0, q1
3111 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3112 %i = insertelement <4 x i32> undef, i32 %y, i64 0
3113 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3114 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3115 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3119 define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3120 ; CHECK-LABEL: ssub_satqr_v8i16_y:
3121 ; CHECK: @ %bb.0: @ %entry
3122 ; CHECK-NEXT: vdup.16 q1, r0
3123 ; CHECK-NEXT: vctp.16 r1
3125 ; CHECK-NEXT: vqsubt.s16 q1, q0, r0
3126 ; CHECK-NEXT: vmov q0, q1
3129 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3130 %i = insertelement <8 x i16> undef, i16 %y, i64 0
3131 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3132 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3133 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3137 define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3138 ; CHECK-LABEL: ssub_satqr_v16i8_y:
3139 ; CHECK: @ %bb.0: @ %entry
3140 ; CHECK-NEXT: vdup.8 q1, r0
3141 ; CHECK-NEXT: vctp.8 r1
3143 ; CHECK-NEXT: vqsubt.s8 q1, q0, r0
3144 ; CHECK-NEXT: vmov q0, q1
3147 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3148 %i = insertelement <16 x i8> undef, i8 %y, i64 0
3149 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3150 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3151 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3155 define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3156 ; CHECK-LABEL: usub_satqr_v4i32_y:
3157 ; CHECK: @ %bb.0: @ %entry
3158 ; CHECK-NEXT: vdup.32 q1, r0
3159 ; CHECK-NEXT: vctp.32 r1
3161 ; CHECK-NEXT: vqsubt.u32 q1, q0, r0
3162 ; CHECK-NEXT: vmov q0, q1
3165 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3166 %i = insertelement <4 x i32> undef, i32 %y, i64 0
3167 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3168 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3169 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3173 define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3174 ; CHECK-LABEL: usub_satqr_v8i16_y:
3175 ; CHECK: @ %bb.0: @ %entry
3176 ; CHECK-NEXT: vdup.16 q1, r0
3177 ; CHECK-NEXT: vctp.16 r1
3179 ; CHECK-NEXT: vqsubt.u16 q1, q0, r0
3180 ; CHECK-NEXT: vmov q0, q1
3183 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3184 %i = insertelement <8 x i16> undef, i16 %y, i64 0
3185 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3186 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3187 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3191 define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3192 ; CHECK-LABEL: usub_satqr_v16i8_y:
3193 ; CHECK: @ %bb.0: @ %entry
3194 ; CHECK-NEXT: vdup.8 q1, r0
3195 ; CHECK-NEXT: vctp.8 r1
3197 ; CHECK-NEXT: vqsubt.u8 q1, q0, r0
3198 ; CHECK-NEXT: vmov q0, q1
3201 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3202 %i = insertelement <16 x i8> undef, i8 %y, i64 0
3203 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3204 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3205 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3209 declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
3210 declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
3211 declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
3212 declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
3213 declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
3214 declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
3215 declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
3216 declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
3217 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
3218 declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
3219 declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
3220 declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
3222 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
3223 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
3224 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
3225 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
3226 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
3227 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
3228 declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
3229 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
3230 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
3231 declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
3232 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
3233 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
3235 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
3236 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
3238 declare <16 x i1> @llvm.arm.mve.vctp8(i32)
3239 declare <8 x i1> @llvm.arm.mve.vctp16(i32)
3240 declare <4 x i1> @llvm.arm.mve.vctp32(i32)