1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @concat_v2i1(i32 %a, i32 %b, <4 x i32> %c) {
5 ; CHECK-LABEL: concat_v2i1:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmsr p0, r1
8 ; CHECK-NEXT: vmov.i8 q1, #0x0
9 ; CHECK-NEXT: vmov.i8 q2, #0xff
10 ; CHECK-NEXT: vpsel q3, q2, q1
11 ; CHECK-NEXT: vmsr p0, r0
12 ; CHECK-NEXT: vpsel q1, q2, q1
13 ; CHECK-NEXT: vmov r1, s12
14 ; CHECK-NEXT: vmov r0, s4
15 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
16 ; CHECK-NEXT: vmov r1, s6
17 ; CHECK-NEXT: vmov r0, s14
18 ; CHECK-NEXT: vmov.i32 q1, #0x0
19 ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
20 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
21 ; CHECK-NEXT: vpsel q0, q0, q1
24 %ai = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %a)
25 %bi = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %b)
26 %s = shufflevector <2 x i1> %ai, <2 x i1> %bi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27 %ci = select <4 x i1> %s, <4 x i32> %c, <4 x i32> zeroinitializer
31 declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
34 define arm_aapcs_vfpcc <8 x i16> @concat_v4i1(<4 x i32> %a, <4 x i32> %b, <8 x i16> %c) {
35 ; CHECK-LABEL: concat_v4i1:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: .vsave {d8, d9}
38 ; CHECK-NEXT: vpush {d8, d9}
39 ; CHECK-NEXT: .pad #16
40 ; CHECK-NEXT: sub sp, #16
41 ; CHECK-NEXT: vmov.i8 q3, #0x0
42 ; CHECK-NEXT: vmov.i8 q4, #0xff
43 ; CHECK-NEXT: vcmp.s32 lt, q1, zr
44 ; CHECK-NEXT: mov r0, sp
45 ; CHECK-NEXT: vpsel q1, q4, q3
46 ; CHECK-NEXT: vcmp.s32 lt, q0, zr
47 ; CHECK-NEXT: vpsel q0, q4, q3
48 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
49 ; CHECK-NEXT: vstrh.32 q0, [r0]
50 ; CHECK-NEXT: vmov.i32 q0, #0x0
51 ; CHECK-NEXT: vldrw.u32 q1, [r0]
52 ; CHECK-NEXT: vcmp.i16 ne, q1, zr
53 ; CHECK-NEXT: vpsel q0, q2, q0
54 ; CHECK-NEXT: add sp, #16
55 ; CHECK-NEXT: vpop {d8, d9}
58 %ai = icmp slt <4 x i32> %a, zeroinitializer
59 %bi = icmp slt <4 x i32> %b, zeroinitializer
60 %s = shufflevector <4 x i1> %ai, <4 x i1> %bi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61 %ci = select <8 x i1> %s, <8 x i16> %c, <8 x i16> zeroinitializer
65 define arm_aapcs_vfpcc <16 x i8> @concat_v8i1(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c) {
66 ; CHECK-LABEL: concat_v8i1:
67 ; CHECK: @ %bb.0: @ %entry
68 ; CHECK-NEXT: .vsave {d8, d9}
69 ; CHECK-NEXT: vpush {d8, d9}
70 ; CHECK-NEXT: .pad #16
71 ; CHECK-NEXT: sub sp, #16
72 ; CHECK-NEXT: vmov.i8 q3, #0x0
73 ; CHECK-NEXT: vmov.i8 q4, #0xff
74 ; CHECK-NEXT: vcmp.s16 lt, q1, zr
75 ; CHECK-NEXT: mov r0, sp
76 ; CHECK-NEXT: vpsel q1, q4, q3
77 ; CHECK-NEXT: vcmp.s16 lt, q0, zr
78 ; CHECK-NEXT: vpsel q0, q4, q3
79 ; CHECK-NEXT: vstrb.16 q1, [r0, #8]
80 ; CHECK-NEXT: vstrb.16 q0, [r0]
81 ; CHECK-NEXT: vmov.i32 q0, #0x0
82 ; CHECK-NEXT: vldrw.u32 q1, [r0]
83 ; CHECK-NEXT: vcmp.i8 ne, q1, zr
84 ; CHECK-NEXT: vpsel q0, q2, q0
85 ; CHECK-NEXT: add sp, #16
86 ; CHECK-NEXT: vpop {d8, d9}
89 %ai = icmp slt <8 x i16> %a, zeroinitializer
90 %bi = icmp slt <8 x i16> %b, zeroinitializer
91 %s = shufflevector <8 x i1> %ai, <8 x i1> %bi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
92 %ci = select <16 x i1> %s, <16 x i8> %c, <16 x i8> zeroinitializer
97 define arm_aapcs_vfpcc <16 x i8> @concat_v48i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d, <4 x i32> %e, <16 x i8> %c) {
98 ; CHECK-LABEL: concat_v48i1:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
101 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
102 ; CHECK-NEXT: .pad #48
103 ; CHECK-NEXT: sub sp, #48
104 ; CHECK-NEXT: vmov.i8 q4, #0x0
105 ; CHECK-NEXT: vmov.i8 q5, #0xff
106 ; CHECK-NEXT: vcmp.s32 lt, q3, zr
107 ; CHECK-NEXT: add r0, sp, #16
108 ; CHECK-NEXT: vpsel q3, q5, q4
109 ; CHECK-NEXT: vcmp.s32 lt, q2, zr
110 ; CHECK-NEXT: vpsel q2, q5, q4
111 ; CHECK-NEXT: vcmp.s32 lt, q1, zr
112 ; CHECK-NEXT: vpsel q1, q5, q4
113 ; CHECK-NEXT: vcmp.s32 lt, q0, zr
114 ; CHECK-NEXT: mov r1, sp
115 ; CHECK-NEXT: vpsel q0, q5, q4
116 ; CHECK-NEXT: vstrh.32 q3, [r0, #8]
117 ; CHECK-NEXT: vstrh.32 q2, [r0]
118 ; CHECK-NEXT: vstrh.32 q1, [r1, #8]
119 ; CHECK-NEXT: vstrh.32 q0, [r1]
120 ; CHECK-NEXT: vldrw.u32 q0, [r0]
121 ; CHECK-NEXT: add r0, sp, #32
122 ; CHECK-NEXT: vcmp.i16 ne, q0, zr
123 ; CHECK-NEXT: vpsel q0, q5, q4
124 ; CHECK-NEXT: vstrb.16 q0, [r0, #8]
125 ; CHECK-NEXT: vldrw.u32 q0, [r1]
126 ; CHECK-NEXT: add r1, sp, #80
127 ; CHECK-NEXT: vldrw.u32 q1, [r1]
128 ; CHECK-NEXT: vcmp.i16 ne, q0, zr
129 ; CHECK-NEXT: vpsel q0, q5, q4
130 ; CHECK-NEXT: vstrb.16 q0, [r0]
131 ; CHECK-NEXT: vmov.i32 q0, #0x0
132 ; CHECK-NEXT: vldrw.u32 q2, [r0]
133 ; CHECK-NEXT: vcmp.i8 ne, q2, zr
134 ; CHECK-NEXT: vpsel q0, q1, q0
135 ; CHECK-NEXT: add sp, #48
136 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
139 %ai = icmp slt <4 x i32> %a, zeroinitializer
140 %bi = icmp slt <4 x i32> %b, zeroinitializer
141 %di = icmp slt <4 x i32> %d, zeroinitializer
142 %ei = icmp slt <4 x i32> %e, zeroinitializer
143 %s1 = shufflevector <4 x i1> %ai, <4 x i1> %bi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
144 %s2 = shufflevector <4 x i1> %di, <4 x i1> %ei, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145 %s = shufflevector <8 x i1> %s1, <8 x i1> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
146 %ci = select <16 x i1> %s, <16 x i8> %c, <16 x i8> zeroinitializer