1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
5 declare arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %c)
6 declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
7 declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
8 declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
10 define arm_aapcs_vfpcc <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a) {
11 ; CHECK-LE-LABEL: shuffle1_v2i64:
12 ; CHECK-LE: @ %bb.0: @ %entry
13 ; CHECK-LE-NEXT: .save {r7, lr}
14 ; CHECK-LE-NEXT: push {r7, lr}
15 ; CHECK-LE-NEXT: .vsave {d8, d9}
16 ; CHECK-LE-NEXT: vpush {d8, d9}
17 ; CHECK-LE-NEXT: .pad #8
18 ; CHECK-LE-NEXT: sub sp, #8
19 ; CHECK-LE-NEXT: vmov r0, r1, d0
20 ; CHECK-LE-NEXT: vmov q4, q1
21 ; CHECK-LE-NEXT: orrs r0, r1
22 ; CHECK-LE-NEXT: mov.w r1, #0
23 ; CHECK-LE-NEXT: csetm r0, eq
24 ; CHECK-LE-NEXT: bfi r1, r0, #0, #8
25 ; CHECK-LE-NEXT: vmov r0, r2, d1
26 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
27 ; CHECK-LE-NEXT: orrs r0, r2
28 ; CHECK-LE-NEXT: csetm r0, eq
29 ; CHECK-LE-NEXT: bfi r1, r0, #8, #8
30 ; CHECK-LE-NEXT: vmsr p0, r1
31 ; CHECK-LE-NEXT: vpsel q0, q1, q0
32 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
33 ; CHECK-LE-NEXT: bl ext_i64
34 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
35 ; CHECK-LE-NEXT: vpsel q0, q4, q0
36 ; CHECK-LE-NEXT: add sp, #8
37 ; CHECK-LE-NEXT: vpop {d8, d9}
38 ; CHECK-LE-NEXT: pop {r7, pc}
40 ; CHECK-BE-LABEL: shuffle1_v2i64:
41 ; CHECK-BE: @ %bb.0: @ %entry
42 ; CHECK-BE-NEXT: .save {r7, lr}
43 ; CHECK-BE-NEXT: push {r7, lr}
44 ; CHECK-BE-NEXT: .vsave {d8, d9}
45 ; CHECK-BE-NEXT: vpush {d8, d9}
46 ; CHECK-BE-NEXT: .pad #8
47 ; CHECK-BE-NEXT: sub sp, #8
48 ; CHECK-BE-NEXT: vmov q4, q1
49 ; CHECK-BE-NEXT: vrev64.32 q1, q0
50 ; CHECK-BE-NEXT: vmov r0, r1, d2
51 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
52 ; CHECK-BE-NEXT: orrs r0, r1
53 ; CHECK-BE-NEXT: mov.w r1, #0
54 ; CHECK-BE-NEXT: csetm r0, eq
55 ; CHECK-BE-NEXT: bfi r1, r0, #0, #8
56 ; CHECK-BE-NEXT: vmov r0, r2, d3
57 ; CHECK-BE-NEXT: orrs r0, r2
58 ; CHECK-BE-NEXT: csetm r0, eq
59 ; CHECK-BE-NEXT: bfi r1, r0, #8, #8
60 ; CHECK-BE-NEXT: vmsr p0, r1
61 ; CHECK-BE-NEXT: vpsel q0, q4, q0
62 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
63 ; CHECK-BE-NEXT: bl ext_i64
64 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
65 ; CHECK-BE-NEXT: vpsel q0, q4, q0
66 ; CHECK-BE-NEXT: add sp, #8
67 ; CHECK-BE-NEXT: vpop {d8, d9}
68 ; CHECK-BE-NEXT: pop {r7, pc}
70 %c = icmp eq <2 x i64> %src, zeroinitializer
71 %s1 = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer
72 %ext = call arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %s1)
73 %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %ext
77 define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
78 ; CHECK-LE-LABEL: shuffle1_v4i32:
79 ; CHECK-LE: @ %bb.0: @ %entry
80 ; CHECK-LE-NEXT: .save {r7, lr}
81 ; CHECK-LE-NEXT: push {r7, lr}
82 ; CHECK-LE-NEXT: .vsave {d8, d9}
83 ; CHECK-LE-NEXT: vpush {d8, d9}
84 ; CHECK-LE-NEXT: .pad #8
85 ; CHECK-LE-NEXT: sub sp, #8
86 ; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
87 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
88 ; CHECK-LE-NEXT: vpsel q0, q1, q0
89 ; CHECK-LE-NEXT: vmov q4, q1
90 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
91 ; CHECK-LE-NEXT: bl ext_i32
92 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
93 ; CHECK-LE-NEXT: vpsel q0, q4, q0
94 ; CHECK-LE-NEXT: add sp, #8
95 ; CHECK-LE-NEXT: vpop {d8, d9}
96 ; CHECK-LE-NEXT: pop {r7, pc}
98 ; CHECK-BE-LABEL: shuffle1_v4i32:
99 ; CHECK-BE: @ %bb.0: @ %entry
100 ; CHECK-BE-NEXT: .save {r7, lr}
101 ; CHECK-BE-NEXT: push {r7, lr}
102 ; CHECK-BE-NEXT: .vsave {d8, d9}
103 ; CHECK-BE-NEXT: vpush {d8, d9}
104 ; CHECK-BE-NEXT: .pad #8
105 ; CHECK-BE-NEXT: sub sp, #8
106 ; CHECK-BE-NEXT: vrev64.32 q4, q1
107 ; CHECK-BE-NEXT: vrev64.32 q1, q0
108 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
109 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
110 ; CHECK-BE-NEXT: vpsel q1, q4, q0
111 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
112 ; CHECK-BE-NEXT: vrev64.32 q0, q1
113 ; CHECK-BE-NEXT: bl ext_i32
114 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
115 ; CHECK-BE-NEXT: vrev64.32 q1, q0
116 ; CHECK-BE-NEXT: vpsel q1, q4, q1
117 ; CHECK-BE-NEXT: vrev64.32 q0, q1
118 ; CHECK-BE-NEXT: add sp, #8
119 ; CHECK-BE-NEXT: vpop {d8, d9}
120 ; CHECK-BE-NEXT: pop {r7, pc}
122 %c = icmp eq <4 x i32> %src, zeroinitializer
123 %s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
124 %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1)
125 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
129 define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
130 ; CHECK-LE-LABEL: shuffle1_v8i16:
131 ; CHECK-LE: @ %bb.0: @ %entry
132 ; CHECK-LE-NEXT: .save {r7, lr}
133 ; CHECK-LE-NEXT: push {r7, lr}
134 ; CHECK-LE-NEXT: .vsave {d8, d9}
135 ; CHECK-LE-NEXT: vpush {d8, d9}
136 ; CHECK-LE-NEXT: .pad #8
137 ; CHECK-LE-NEXT: sub sp, #8
138 ; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
139 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
140 ; CHECK-LE-NEXT: vpsel q0, q1, q0
141 ; CHECK-LE-NEXT: vmov q4, q1
142 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
143 ; CHECK-LE-NEXT: bl ext_i16
144 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
145 ; CHECK-LE-NEXT: vpsel q0, q4, q0
146 ; CHECK-LE-NEXT: add sp, #8
147 ; CHECK-LE-NEXT: vpop {d8, d9}
148 ; CHECK-LE-NEXT: pop {r7, pc}
150 ; CHECK-BE-LABEL: shuffle1_v8i16:
151 ; CHECK-BE: @ %bb.0: @ %entry
152 ; CHECK-BE-NEXT: .save {r7, lr}
153 ; CHECK-BE-NEXT: push {r7, lr}
154 ; CHECK-BE-NEXT: .vsave {d8, d9}
155 ; CHECK-BE-NEXT: vpush {d8, d9}
156 ; CHECK-BE-NEXT: .pad #8
157 ; CHECK-BE-NEXT: sub sp, #8
158 ; CHECK-BE-NEXT: vrev64.16 q4, q1
159 ; CHECK-BE-NEXT: vrev64.16 q1, q0
160 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
161 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
162 ; CHECK-BE-NEXT: vpsel q1, q4, q0
163 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
164 ; CHECK-BE-NEXT: vrev64.16 q0, q1
165 ; CHECK-BE-NEXT: bl ext_i16
166 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
167 ; CHECK-BE-NEXT: vrev64.16 q1, q0
168 ; CHECK-BE-NEXT: vpsel q1, q4, q1
169 ; CHECK-BE-NEXT: vrev64.16 q0, q1
170 ; CHECK-BE-NEXT: add sp, #8
171 ; CHECK-BE-NEXT: vpop {d8, d9}
172 ; CHECK-BE-NEXT: pop {r7, pc}
174 %c = icmp eq <8 x i16> %src, zeroinitializer
175 %s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
176 %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1)
177 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
181 define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
182 ; CHECK-LE-LABEL: shuffle1_v16i8:
183 ; CHECK-LE: @ %bb.0: @ %entry
184 ; CHECK-LE-NEXT: .save {r7, lr}
185 ; CHECK-LE-NEXT: push {r7, lr}
186 ; CHECK-LE-NEXT: .vsave {d8, d9}
187 ; CHECK-LE-NEXT: vpush {d8, d9}
188 ; CHECK-LE-NEXT: .pad #8
189 ; CHECK-LE-NEXT: sub sp, #8
190 ; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
191 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
192 ; CHECK-LE-NEXT: vpsel q0, q1, q0
193 ; CHECK-LE-NEXT: vmov q4, q1
194 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
195 ; CHECK-LE-NEXT: bl ext_i8
196 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
197 ; CHECK-LE-NEXT: vpsel q0, q4, q0
198 ; CHECK-LE-NEXT: add sp, #8
199 ; CHECK-LE-NEXT: vpop {d8, d9}
200 ; CHECK-LE-NEXT: pop {r7, pc}
202 ; CHECK-BE-LABEL: shuffle1_v16i8:
203 ; CHECK-BE: @ %bb.0: @ %entry
204 ; CHECK-BE-NEXT: .save {r7, lr}
205 ; CHECK-BE-NEXT: push {r7, lr}
206 ; CHECK-BE-NEXT: .vsave {d8, d9}
207 ; CHECK-BE-NEXT: vpush {d8, d9}
208 ; CHECK-BE-NEXT: .pad #8
209 ; CHECK-BE-NEXT: sub sp, #8
210 ; CHECK-BE-NEXT: vrev64.8 q4, q1
211 ; CHECK-BE-NEXT: vrev64.8 q1, q0
212 ; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
213 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
214 ; CHECK-BE-NEXT: vpsel q1, q4, q0
215 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
216 ; CHECK-BE-NEXT: vrev64.8 q0, q1
217 ; CHECK-BE-NEXT: bl ext_i8
218 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
219 ; CHECK-BE-NEXT: vrev64.8 q1, q0
220 ; CHECK-BE-NEXT: vpsel q1, q4, q1
221 ; CHECK-BE-NEXT: vrev64.8 q0, q1
222 ; CHECK-BE-NEXT: add sp, #8
223 ; CHECK-BE-NEXT: vpop {d8, d9}
224 ; CHECK-BE-NEXT: pop {r7, pc}
226 %c = icmp eq <16 x i8> %src, zeroinitializer
227 %s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
228 %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1)
229 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext