1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc signext i8 @test_vminvq_s8(i8 signext %a, <16 x i8> %b) {
5 ; CHECK-LABEL: test_vminvq_s8:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vminv.s8 r0, q0
8 ; CHECK-NEXT: sxtb r0, r0
11 %0 = zext i8 %a to i32
12 %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 0)
13 %2 = trunc i32 %1 to i8
17 define arm_aapcs_vfpcc signext i16 @test_vminvq_s16(i16 signext %a, <8 x i16> %b) {
18 ; CHECK-LABEL: test_vminvq_s16:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vminv.s16 r0, q0
21 ; CHECK-NEXT: sxth r0, r0
24 %0 = zext i16 %a to i32
25 %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 0)
26 %2 = trunc i32 %1 to i16
30 define arm_aapcs_vfpcc i32 @test_vminvq_s32(i32 %a, <4 x i32> %b) {
31 ; CHECK-LABEL: test_vminvq_s32:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vminv.s32 r0, q0
36 %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 0)
40 define arm_aapcs_vfpcc zeroext i8 @test_vminvq_u8(i8 zeroext %a, <16 x i8> %b) {
41 ; CHECK-LABEL: test_vminvq_u8:
42 ; CHECK: @ %bb.0: @ %entry
43 ; CHECK-NEXT: vminv.u8 r0, q0
44 ; CHECK-NEXT: uxtb r0, r0
47 %0 = zext i8 %a to i32
48 %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 1)
49 %2 = trunc i32 %1 to i8
53 define arm_aapcs_vfpcc zeroext i16 @test_vminvq_u16(i16 zeroext %a, <8 x i16> %b) {
54 ; CHECK-LABEL: test_vminvq_u16:
55 ; CHECK: @ %bb.0: @ %entry
56 ; CHECK-NEXT: vminv.u16 r0, q0
57 ; CHECK-NEXT: uxth r0, r0
60 %0 = zext i16 %a to i32
61 %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 1)
62 %2 = trunc i32 %1 to i16
66 define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) {
67 ; CHECK-LABEL: test_vminvq_u32:
68 ; CHECK: @ %bb.0: @ %entry
69 ; CHECK-NEXT: vminv.u32 r0, q0
72 %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 1)
76 define arm_aapcs_vfpcc signext i8 @test_vmaxvq_s8(i8 signext %a, <16 x i8> %b) {
77 ; CHECK-LABEL: test_vmaxvq_s8:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmaxv.s8 r0, q0
80 ; CHECK-NEXT: sxtb r0, r0
83 %0 = zext i8 %a to i32
84 %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 0)
85 %2 = trunc i32 %1 to i8
89 define arm_aapcs_vfpcc signext i16 @test_vmaxvq_s16(i16 signext %a, <8 x i16> %b) {
90 ; CHECK-LABEL: test_vmaxvq_s16:
91 ; CHECK: @ %bb.0: @ %entry
92 ; CHECK-NEXT: vmaxv.s16 r0, q0
93 ; CHECK-NEXT: sxth r0, r0
96 %0 = zext i16 %a to i32
97 %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 0)
98 %2 = trunc i32 %1 to i16
102 define arm_aapcs_vfpcc i32 @test_vmaxvq_s32(i32 %a, <4 x i32> %b) {
103 ; CHECK-LABEL: test_vmaxvq_s32:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vmaxv.s32 r0, q0
108 %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 0)
112 define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_u8(i8 zeroext %a, <16 x i8> %b) {
113 ; CHECK-LABEL: test_vmaxvq_u8:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: vmaxv.u8 r0, q0
116 ; CHECK-NEXT: uxtb r0, r0
119 %0 = zext i8 %a to i32
120 %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 1)
121 %2 = trunc i32 %1 to i8
125 define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_u16(i16 zeroext %a, <8 x i16> %b) {
126 ; CHECK-LABEL: test_vmaxvq_u16:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: vmaxv.u16 r0, q0
129 ; CHECK-NEXT: uxth r0, r0
132 %0 = zext i16 %a to i32
133 %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 1)
134 %2 = trunc i32 %1 to i16
138 define arm_aapcs_vfpcc i32 @test_vmaxvq_u32(i32 %a, <4 x i32> %b) {
139 ; CHECK-LABEL: test_vmaxvq_u32:
140 ; CHECK: @ %bb.0: @ %entry
141 ; CHECK-NEXT: vmaxv.u32 r0, q0
144 %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 1)
148 define arm_aapcs_vfpcc zeroext i8 @test_vminavq_s8(i8 zeroext %a, <16 x i8> %b) {
149 ; CHECK-LABEL: test_vminavq_s8:
150 ; CHECK: @ %bb.0: @ %entry
151 ; CHECK-NEXT: vminav.s8 r0, q0
152 ; CHECK-NEXT: uxtb r0, r0
155 %0 = zext i8 %a to i32
156 %1 = tail call i32 @llvm.arm.mve.minav.v16i8(i32 %0, <16 x i8> %b)
157 %2 = trunc i32 %1 to i8
161 define arm_aapcs_vfpcc zeroext i16 @test_vminavq_s16(i16 zeroext %a, <8 x i16> %b) {
162 ; CHECK-LABEL: test_vminavq_s16:
163 ; CHECK: @ %bb.0: @ %entry
164 ; CHECK-NEXT: vminav.s16 r0, q0
165 ; CHECK-NEXT: uxth r0, r0
168 %0 = zext i16 %a to i32
169 %1 = tail call i32 @llvm.arm.mve.minav.v8i16(i32 %0, <8 x i16> %b)
170 %2 = trunc i32 %1 to i16
174 define arm_aapcs_vfpcc i32 @test_vminavq_s32(i32 %a, <4 x i32> %b) {
175 ; CHECK-LABEL: test_vminavq_s32:
176 ; CHECK: @ %bb.0: @ %entry
177 ; CHECK-NEXT: vminav.s32 r0, q0
180 %0 = tail call i32 @llvm.arm.mve.minav.v4i32(i32 %a, <4 x i32> %b)
184 define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_s8(i8 zeroext %a, <16 x i8> %b) {
185 ; CHECK-LABEL: test_vmaxavq_s8:
186 ; CHECK: @ %bb.0: @ %entry
187 ; CHECK-NEXT: vmaxav.s8 r0, q0
188 ; CHECK-NEXT: uxtb r0, r0
191 %0 = zext i8 %a to i32
192 %1 = tail call i32 @llvm.arm.mve.maxav.v16i8(i32 %0, <16 x i8> %b)
193 %2 = trunc i32 %1 to i8
197 define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_s16(i16 zeroext %a, <8 x i16> %b) {
198 ; CHECK-LABEL: test_vmaxavq_s16:
199 ; CHECK: @ %bb.0: @ %entry
200 ; CHECK-NEXT: vmaxav.s16 r0, q0
201 ; CHECK-NEXT: uxth r0, r0
204 %0 = zext i16 %a to i32
205 %1 = tail call i32 @llvm.arm.mve.maxav.v8i16(i32 %0, <8 x i16> %b)
206 %2 = trunc i32 %1 to i16
210 define arm_aapcs_vfpcc i32 @test_vmaxavq_s32(i32 %a, <4 x i32> %b) {
211 ; CHECK-LABEL: test_vmaxavq_s32:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vmaxav.s32 r0, q0
216 %0 = tail call i32 @llvm.arm.mve.maxav.v4i32(i32 %a, <4 x i32> %b)
220 define arm_aapcs_vfpcc float @test_vminnmvq_f16(float %a.coerce, <8 x half> %b) {
221 ; CHECK-LABEL: test_vminnmvq_f16:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vmov r0, s0
224 ; CHECK-NEXT: vminnmv.f16 r0, q1
225 ; CHECK-NEXT: vmov s0, r0
226 ; CHECK-NEXT: vmov.f16 r0, s0
227 ; CHECK-NEXT: vmov s0, r0
230 %0 = bitcast float %a.coerce to i32
231 %tmp.0.extract.trunc = trunc i32 %0 to i16
232 %1 = bitcast i16 %tmp.0.extract.trunc to half
233 %2 = tail call half @llvm.arm.mve.minnmv.f16.v8f16(half %1, <8 x half> %b)
234 %3 = bitcast half %2 to i16
235 %tmp2.0.insert.ext = zext i16 %3 to i32
236 %4 = bitcast i32 %tmp2.0.insert.ext to float
240 define arm_aapcs_vfpcc float @test_vminnmvq_f32(float %a, <4 x float> %b) {
241 ; CHECK-LABEL: test_vminnmvq_f32:
242 ; CHECK: @ %bb.0: @ %entry
243 ; CHECK-NEXT: vmov r0, s0
244 ; CHECK-NEXT: vminnmv.f32 r0, q1
245 ; CHECK-NEXT: vmov s0, r0
248 %0 = tail call float @llvm.arm.mve.minnmv.f32.v4f32(float %a, <4 x float> %b)
252 define arm_aapcs_vfpcc float @test_vminnmavq_f16(float %a.coerce, <8 x half> %b) {
253 ; CHECK-LABEL: test_vminnmavq_f16:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vmov r0, s0
256 ; CHECK-NEXT: vminnmav.f16 r0, q1
257 ; CHECK-NEXT: vmov s0, r0
258 ; CHECK-NEXT: vmov.f16 r0, s0
259 ; CHECK-NEXT: vmov s0, r0
262 %0 = bitcast float %a.coerce to i32
263 %tmp.0.extract.trunc = trunc i32 %0 to i16
264 %1 = bitcast i16 %tmp.0.extract.trunc to half
265 %2 = tail call half @llvm.arm.mve.minnmav.f16.v8f16(half %1, <8 x half> %b)
266 %3 = bitcast half %2 to i16
267 %tmp2.0.insert.ext = zext i16 %3 to i32
268 %4 = bitcast i32 %tmp2.0.insert.ext to float
272 define arm_aapcs_vfpcc float @test_vminnmavq_f32(float %a, <4 x float> %b) {
273 ; CHECK-LABEL: test_vminnmavq_f32:
274 ; CHECK: @ %bb.0: @ %entry
275 ; CHECK-NEXT: vmov r0, s0
276 ; CHECK-NEXT: vminnmav.f32 r0, q1
277 ; CHECK-NEXT: vmov s0, r0
280 %0 = tail call float @llvm.arm.mve.minnmav.f32.v4f32(float %a, <4 x float> %b)
284 define arm_aapcs_vfpcc float @test_vmaxnmvq_f16(float %a.coerce, <8 x half> %b) {
285 ; CHECK-LABEL: test_vmaxnmvq_f16:
286 ; CHECK: @ %bb.0: @ %entry
287 ; CHECK-NEXT: vmov r0, s0
288 ; CHECK-NEXT: vmaxnmv.f16 r0, q1
289 ; CHECK-NEXT: vmov s0, r0
290 ; CHECK-NEXT: vmov.f16 r0, s0
291 ; CHECK-NEXT: vmov s0, r0
294 %0 = bitcast float %a.coerce to i32
295 %tmp.0.extract.trunc = trunc i32 %0 to i16
296 %1 = bitcast i16 %tmp.0.extract.trunc to half
297 %2 = tail call half @llvm.arm.mve.maxnmv.f16.v8f16(half %1, <8 x half> %b)
298 %3 = bitcast half %2 to i16
299 %tmp2.0.insert.ext = zext i16 %3 to i32
300 %4 = bitcast i32 %tmp2.0.insert.ext to float
304 define arm_aapcs_vfpcc float @test_vmaxnmvq_f32(float %a, <4 x float> %b) {
305 ; CHECK-LABEL: test_vmaxnmvq_f32:
306 ; CHECK: @ %bb.0: @ %entry
307 ; CHECK-NEXT: vmov r0, s0
308 ; CHECK-NEXT: vmaxnmv.f32 r0, q1
309 ; CHECK-NEXT: vmov s0, r0
312 %0 = tail call float @llvm.arm.mve.maxnmv.f32.v4f32(float %a, <4 x float> %b)
316 define arm_aapcs_vfpcc float @test_vmaxnmavq_f16(float %a.coerce, <8 x half> %b) {
317 ; CHECK-LABEL: test_vmaxnmavq_f16:
318 ; CHECK: @ %bb.0: @ %entry
319 ; CHECK-NEXT: vmov r0, s0
320 ; CHECK-NEXT: vmaxnmav.f16 r0, q1
321 ; CHECK-NEXT: vmov s0, r0
322 ; CHECK-NEXT: vmov.f16 r0, s0
323 ; CHECK-NEXT: vmov s0, r0
326 %0 = bitcast float %a.coerce to i32
327 %tmp.0.extract.trunc = trunc i32 %0 to i16
328 %1 = bitcast i16 %tmp.0.extract.trunc to half
329 %2 = tail call half @llvm.arm.mve.maxnmav.f16.v8f16(half %1, <8 x half> %b)
330 %3 = bitcast half %2 to i16
331 %tmp2.0.insert.ext = zext i16 %3 to i32
332 %4 = bitcast i32 %tmp2.0.insert.ext to float
336 define arm_aapcs_vfpcc float @test_vmaxnmavq_f32(float %a, <4 x float> %b) {
337 ; CHECK-LABEL: test_vmaxnmavq_f32:
338 ; CHECK: @ %bb.0: @ %entry
339 ; CHECK-NEXT: vmov r0, s0
340 ; CHECK-NEXT: vmaxnmav.f32 r0, q1
341 ; CHECK-NEXT: vmov s0, r0
344 %0 = tail call float @llvm.arm.mve.maxnmav.f32.v4f32(float %a, <4 x float> %b)
348 define arm_aapcs_vfpcc signext i8 @test_vminvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
349 ; CHECK-LABEL: test_vminvq_p_s8:
350 ; CHECK: @ %bb.0: @ %entry
351 ; CHECK-NEXT: vmsr p0, r1
353 ; CHECK-NEXT: vminvt.s8 r0, q0
354 ; CHECK-NEXT: sxtb r0, r0
357 %0 = zext i8 %a to i32
358 %1 = zext i16 %p to i32
359 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
360 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
361 %4 = trunc i32 %3 to i8
365 define arm_aapcs_vfpcc signext i16 @test_vminvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
366 ; CHECK-LABEL: test_vminvq_p_s16:
367 ; CHECK: @ %bb.0: @ %entry
368 ; CHECK-NEXT: vmsr p0, r1
370 ; CHECK-NEXT: vminvt.s16 r0, q0
371 ; CHECK-NEXT: sxth r0, r0
374 %0 = zext i16 %a to i32
375 %1 = zext i16 %p to i32
376 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
377 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
378 %4 = trunc i32 %3 to i16
382 define arm_aapcs_vfpcc i32 @test_vminvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
383 ; CHECK-LABEL: test_vminvq_p_s32:
384 ; CHECK: @ %bb.0: @ %entry
385 ; CHECK-NEXT: vmsr p0, r1
387 ; CHECK-NEXT: vminvt.s32 r0, q0
390 %0 = zext i16 %p to i32
391 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
392 %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
396 define arm_aapcs_vfpcc zeroext i8 @test_vminvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
397 ; CHECK-LABEL: test_vminvq_p_u8:
398 ; CHECK: @ %bb.0: @ %entry
399 ; CHECK-NEXT: vmsr p0, r1
401 ; CHECK-NEXT: vminvt.u8 r0, q0
402 ; CHECK-NEXT: uxtb r0, r0
405 %0 = zext i8 %a to i32
406 %1 = zext i16 %p to i32
407 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
408 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
409 %4 = trunc i32 %3 to i8
413 define arm_aapcs_vfpcc zeroext i16 @test_vminvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
414 ; CHECK-LABEL: test_vminvq_p_u16:
415 ; CHECK: @ %bb.0: @ %entry
416 ; CHECK-NEXT: vmsr p0, r1
418 ; CHECK-NEXT: vminvt.u16 r0, q0
419 ; CHECK-NEXT: uxth r0, r0
422 %0 = zext i16 %a to i32
423 %1 = zext i16 %p to i32
424 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
425 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
426 %4 = trunc i32 %3 to i16
430 define arm_aapcs_vfpcc i32 @test_vminvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
431 ; CHECK-LABEL: test_vminvq_p_u32:
432 ; CHECK: @ %bb.0: @ %entry
433 ; CHECK-NEXT: vmsr p0, r1
435 ; CHECK-NEXT: vminvt.u32 r0, q0
438 %0 = zext i16 %p to i32
439 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
440 %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
444 define arm_aapcs_vfpcc signext i8 @test_vmaxvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
445 ; CHECK-LABEL: test_vmaxvq_p_s8:
446 ; CHECK: @ %bb.0: @ %entry
447 ; CHECK-NEXT: vmsr p0, r1
449 ; CHECK-NEXT: vmaxvt.s8 r0, q0
450 ; CHECK-NEXT: sxtb r0, r0
453 %0 = zext i8 %a to i32
454 %1 = zext i16 %p to i32
455 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
456 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
457 %4 = trunc i32 %3 to i8
461 define arm_aapcs_vfpcc signext i16 @test_vmaxvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
462 ; CHECK-LABEL: test_vmaxvq_p_s16:
463 ; CHECK: @ %bb.0: @ %entry
464 ; CHECK-NEXT: vmsr p0, r1
466 ; CHECK-NEXT: vmaxvt.s16 r0, q0
467 ; CHECK-NEXT: sxth r0, r0
470 %0 = zext i16 %a to i32
471 %1 = zext i16 %p to i32
472 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
473 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
474 %4 = trunc i32 %3 to i16
478 define arm_aapcs_vfpcc i32 @test_vmaxvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
479 ; CHECK-LABEL: test_vmaxvq_p_s32:
480 ; CHECK: @ %bb.0: @ %entry
481 ; CHECK-NEXT: vmsr p0, r1
483 ; CHECK-NEXT: vmaxvt.s32 r0, q0
486 %0 = zext i16 %p to i32
487 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
488 %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
492 define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
493 ; CHECK-LABEL: test_vmaxvq_p_u8:
494 ; CHECK: @ %bb.0: @ %entry
495 ; CHECK-NEXT: vmsr p0, r1
497 ; CHECK-NEXT: vmaxvt.u8 r0, q0
498 ; CHECK-NEXT: uxtb r0, r0
501 %0 = zext i8 %a to i32
502 %1 = zext i16 %p to i32
503 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
504 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
505 %4 = trunc i32 %3 to i8
509 define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
510 ; CHECK-LABEL: test_vmaxvq_p_u16:
511 ; CHECK: @ %bb.0: @ %entry
512 ; CHECK-NEXT: vmsr p0, r1
514 ; CHECK-NEXT: vmaxvt.u16 r0, q0
515 ; CHECK-NEXT: uxth r0, r0
518 %0 = zext i16 %a to i32
519 %1 = zext i16 %p to i32
520 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
521 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
522 %4 = trunc i32 %3 to i16
526 define arm_aapcs_vfpcc i32 @test_vmaxvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
527 ; CHECK-LABEL: test_vmaxvq_p_u32:
528 ; CHECK: @ %bb.0: @ %entry
529 ; CHECK-NEXT: vmsr p0, r1
531 ; CHECK-NEXT: vmaxvt.u32 r0, q0
534 %0 = zext i16 %p to i32
535 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
536 %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
540 define arm_aapcs_vfpcc zeroext i8 @test_vminavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
541 ; CHECK-LABEL: test_vminavq_p_s8:
542 ; CHECK: @ %bb.0: @ %entry
543 ; CHECK-NEXT: vmsr p0, r1
545 ; CHECK-NEXT: vminavt.s8 r0, q0
546 ; CHECK-NEXT: uxtb r0, r0
549 %0 = zext i8 %a to i32
550 %1 = zext i16 %p to i32
551 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
552 %3 = tail call i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
553 %4 = trunc i32 %3 to i8
557 define arm_aapcs_vfpcc zeroext i16 @test_vminavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
558 ; CHECK-LABEL: test_vminavq_p_s16:
559 ; CHECK: @ %bb.0: @ %entry
560 ; CHECK-NEXT: vmsr p0, r1
562 ; CHECK-NEXT: vminavt.s16 r0, q0
563 ; CHECK-NEXT: uxth r0, r0
566 %0 = zext i16 %a to i32
567 %1 = zext i16 %p to i32
568 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
569 %3 = tail call i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
570 %4 = trunc i32 %3 to i16
574 define arm_aapcs_vfpcc i32 @test_vminavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
575 ; CHECK-LABEL: test_vminavq_p_s32:
576 ; CHECK: @ %bb.0: @ %entry
577 ; CHECK-NEXT: vmsr p0, r1
579 ; CHECK-NEXT: vminavt.s32 r0, q0
582 %0 = zext i16 %p to i32
583 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
584 %2 = tail call i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
588 define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
589 ; CHECK-LABEL: test_vmaxavq_p_s8:
590 ; CHECK: @ %bb.0: @ %entry
591 ; CHECK-NEXT: vmsr p0, r1
593 ; CHECK-NEXT: vmaxavt.s8 r0, q0
594 ; CHECK-NEXT: uxtb r0, r0
597 %0 = zext i8 %a to i32
598 %1 = zext i16 %p to i32
599 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
600 %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
601 %4 = trunc i32 %3 to i8
605 define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
606 ; CHECK-LABEL: test_vmaxavq_p_s16:
607 ; CHECK: @ %bb.0: @ %entry
608 ; CHECK-NEXT: vmsr p0, r1
610 ; CHECK-NEXT: vmaxavt.s16 r0, q0
611 ; CHECK-NEXT: uxth r0, r0
614 %0 = zext i16 %a to i32
615 %1 = zext i16 %p to i32
616 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
617 %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
618 %4 = trunc i32 %3 to i16
622 define arm_aapcs_vfpcc i32 @test_vmaxavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
623 ; CHECK-LABEL: test_vmaxavq_p_s32:
624 ; CHECK: @ %bb.0: @ %entry
625 ; CHECK-NEXT: vmsr p0, r1
627 ; CHECK-NEXT: vmaxavt.s32 r0, q0
630 %0 = zext i16 %p to i32
631 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
632 %2 = tail call i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
636 define arm_aapcs_vfpcc float @test_vminnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
637 ; CHECK-LABEL: test_vminnmvq_p_f16:
638 ; CHECK: @ %bb.0: @ %entry
639 ; CHECK-NEXT: vmov r1, s0
640 ; CHECK-NEXT: vmsr p0, r0
642 ; CHECK-NEXT: vminnmvt.f16 r1, q1
643 ; CHECK-NEXT: vmov s0, r1
644 ; CHECK-NEXT: vmov.f16 r0, s0
645 ; CHECK-NEXT: vmov s0, r0
648 %0 = bitcast float %a.coerce to i32
649 %tmp.0.extract.trunc = trunc i32 %0 to i16
650 %1 = bitcast i16 %tmp.0.extract.trunc to half
651 %2 = zext i16 %p to i32
652 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
653 %4 = tail call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
654 %5 = bitcast half %4 to i16
655 %tmp2.0.insert.ext = zext i16 %5 to i32
656 %6 = bitcast i32 %tmp2.0.insert.ext to float
660 define arm_aapcs_vfpcc float @test_vminnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
661 ; CHECK-LABEL: test_vminnmvq_p_f32:
662 ; CHECK: @ %bb.0: @ %entry
663 ; CHECK-NEXT: vmsr p0, r0
664 ; CHECK-NEXT: vmov r0, s0
666 ; CHECK-NEXT: vminnmvt.f32 r0, q1
667 ; CHECK-NEXT: vmov s0, r0
670 %0 = zext i16 %p to i32
671 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
672 %2 = tail call float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
676 define arm_aapcs_vfpcc float @test_vminnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
677 ; CHECK-LABEL: test_vminnmavq_p_f16:
678 ; CHECK: @ %bb.0: @ %entry
679 ; CHECK-NEXT: vmov r1, s0
680 ; CHECK-NEXT: vmsr p0, r0
682 ; CHECK-NEXT: vminnmavt.f16 r1, q1
683 ; CHECK-NEXT: vmov s0, r1
684 ; CHECK-NEXT: vmov.f16 r0, s0
685 ; CHECK-NEXT: vmov s0, r0
688 %0 = bitcast float %a.coerce to i32
689 %tmp.0.extract.trunc = trunc i32 %0 to i16
690 %1 = bitcast i16 %tmp.0.extract.trunc to half
691 %2 = zext i16 %p to i32
692 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
693 %4 = tail call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
694 %5 = bitcast half %4 to i16
695 %tmp2.0.insert.ext = zext i16 %5 to i32
696 %6 = bitcast i32 %tmp2.0.insert.ext to float
700 define arm_aapcs_vfpcc float @test_vminnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
701 ; CHECK-LABEL: test_vminnmavq_p_f32:
702 ; CHECK: @ %bb.0: @ %entry
703 ; CHECK-NEXT: vmsr p0, r0
704 ; CHECK-NEXT: vmov r0, s0
706 ; CHECK-NEXT: vminnmavt.f32 r0, q1
707 ; CHECK-NEXT: vmov s0, r0
710 %0 = zext i16 %p to i32
711 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
712 %2 = tail call float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
716 define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
717 ; CHECK-LABEL: test_vmaxnmvq_p_f16:
718 ; CHECK: @ %bb.0: @ %entry
719 ; CHECK-NEXT: vmov r1, s0
720 ; CHECK-NEXT: vmsr p0, r0
722 ; CHECK-NEXT: vmaxnmvt.f16 r1, q1
723 ; CHECK-NEXT: vmov s0, r1
724 ; CHECK-NEXT: vmov.f16 r0, s0
725 ; CHECK-NEXT: vmov s0, r0
728 %0 = bitcast float %a.coerce to i32
729 %tmp.0.extract.trunc = trunc i32 %0 to i16
730 %1 = bitcast i16 %tmp.0.extract.trunc to half
731 %2 = zext i16 %p to i32
732 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
733 %4 = tail call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
734 %5 = bitcast half %4 to i16
735 %tmp2.0.insert.ext = zext i16 %5 to i32
736 %6 = bitcast i32 %tmp2.0.insert.ext to float
740 define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
741 ; CHECK-LABEL: test_vmaxnmvq_p_f32:
742 ; CHECK: @ %bb.0: @ %entry
743 ; CHECK-NEXT: vmsr p0, r0
744 ; CHECK-NEXT: vmov r0, s0
746 ; CHECK-NEXT: vmaxnmvt.f32 r0, q1
747 ; CHECK-NEXT: vmov s0, r0
750 %0 = zext i16 %p to i32
751 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
752 %2 = tail call float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
756 define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
757 ; CHECK-LABEL: test_vmaxnmavq_p_f16:
758 ; CHECK: @ %bb.0: @ %entry
759 ; CHECK-NEXT: vmov r1, s0
760 ; CHECK-NEXT: vmsr p0, r0
762 ; CHECK-NEXT: vmaxnmavt.f16 r1, q1
763 ; CHECK-NEXT: vmov s0, r1
764 ; CHECK-NEXT: vmov.f16 r0, s0
765 ; CHECK-NEXT: vmov s0, r0
768 %0 = bitcast float %a.coerce to i32
769 %tmp.0.extract.trunc = trunc i32 %0 to i16
770 %1 = bitcast i16 %tmp.0.extract.trunc to half
771 %2 = zext i16 %p to i32
772 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
773 %4 = tail call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
774 %5 = bitcast half %4 to i16
775 %tmp2.0.insert.ext = zext i16 %5 to i32
776 %6 = bitcast i32 %tmp2.0.insert.ext to float
780 define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
781 ; CHECK-LABEL: test_vmaxnmavq_p_f32:
782 ; CHECK: @ %bb.0: @ %entry
783 ; CHECK-NEXT: vmsr p0, r0
784 ; CHECK-NEXT: vmov r0, s0
786 ; CHECK-NEXT: vmaxnmavt.f32 r0, q1
787 ; CHECK-NEXT: vmov s0, r0
790 %0 = zext i16 %p to i32
791 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
792 %2 = tail call float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
796 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
797 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
798 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
800 declare i32 @llvm.arm.mve.minv.v16i8(i32, <16 x i8>, i32)
801 declare i32 @llvm.arm.mve.minv.v8i16(i32, <8 x i16>, i32)
802 declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32)
803 declare i32 @llvm.arm.mve.maxv.v16i8(i32, <16 x i8>, i32)
804 declare i32 @llvm.arm.mve.maxv.v8i16(i32, <8 x i16>, i32)
805 declare i32 @llvm.arm.mve.maxv.v4i32(i32, <4 x i32>, i32)
806 declare i32 @llvm.arm.mve.minav.v16i8(i32, <16 x i8>)
807 declare i32 @llvm.arm.mve.minav.v8i16(i32, <8 x i16>)
808 declare i32 @llvm.arm.mve.minav.v4i32(i32, <4 x i32>)
809 declare i32 @llvm.arm.mve.maxav.v16i8(i32, <16 x i8>)
810 declare i32 @llvm.arm.mve.maxav.v8i16(i32, <8 x i16>)
811 declare i32 @llvm.arm.mve.maxav.v4i32(i32, <4 x i32>)
812 declare i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
813 declare i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
814 declare i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
815 declare i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
816 declare i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
817 declare i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
818 declare i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
819 declare i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
820 declare i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)
821 declare i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
822 declare i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
823 declare i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)
825 declare half @llvm.arm.mve.minnmv.f16.v8f16(half, <8 x half>)
826 declare half @llvm.arm.mve.minnmav.f16.v8f16(half, <8 x half>)
827 declare half @llvm.arm.mve.maxnmv.f16.v8f16(half, <8 x half>)
828 declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>)
829 declare half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
830 declare half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
831 declare half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
832 declare half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
834 declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>)
835 declare float @llvm.arm.mve.minnmav.f32.v4f32(float, <4 x float>)
836 declare float @llvm.arm.mve.maxnmv.f32.v4f32(float, <4 x float>)
837 declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>)
838 declare float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
839 declare float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
840 declare float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
841 declare float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)