1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=LE %s
3 ; RUN: llc -mtriple=thumbebv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=BE %s
5 define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_s16(<16 x i8> %a, <8 x i16> %b) {
6 ; LE-LABEL: test_vmovnbq_s16:
7 ; LE: @ %bb.0: @ %entry
8 ; LE-NEXT: vmovnb.i16 q0, q1
11 ; BE-LABEL: test_vmovnbq_s16:
12 ; BE: @ %bb.0: @ %entry
13 ; BE-NEXT: vrev64.16 q2, q1
14 ; BE-NEXT: vrev64.8 q1, q0
15 ; BE-NEXT: vmovnb.i16 q1, q2
16 ; BE-NEXT: vrev64.8 q0, q1
19 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
20 %1 = tail call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %0)
21 %2 = shufflevector <8 x i16> %b, <8 x i16> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
22 %3 = trunc <16 x i16> %2 to <16 x i8>
26 define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_s32(<8 x i16> %a, <4 x i32> %b) {
27 ; LE-LABEL: test_vmovnbq_s32:
28 ; LE: @ %bb.0: @ %entry
29 ; LE-NEXT: vmovnb.i32 q0, q1
32 ; BE-LABEL: test_vmovnbq_s32:
33 ; BE: @ %bb.0: @ %entry
34 ; BE-NEXT: vrev64.32 q2, q1
35 ; BE-NEXT: vrev64.16 q1, q0
36 ; BE-NEXT: vmovnb.i32 q1, q2
37 ; BE-NEXT: vrev64.16 q0, q1
40 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
41 %1 = tail call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %0)
42 %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
43 %3 = trunc <8 x i32> %2 to <8 x i16>
47 define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_u16(<16 x i8> %a, <8 x i16> %b) {
48 ; LE-LABEL: test_vmovnbq_u16:
49 ; LE: @ %bb.0: @ %entry
50 ; LE-NEXT: vmovnb.i16 q0, q1
53 ; BE-LABEL: test_vmovnbq_u16:
54 ; BE: @ %bb.0: @ %entry
55 ; BE-NEXT: vrev64.16 q2, q1
56 ; BE-NEXT: vrev64.8 q1, q0
57 ; BE-NEXT: vmovnb.i16 q1, q2
58 ; BE-NEXT: vrev64.8 q0, q1
61 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
62 %1 = tail call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %0)
63 %2 = shufflevector <8 x i16> %b, <8 x i16> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
64 %3 = trunc <16 x i16> %2 to <16 x i8>
68 define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_u32(<8 x i16> %a, <4 x i32> %b) {
69 ; LE-LABEL: test_vmovnbq_u32:
70 ; LE: @ %bb.0: @ %entry
71 ; LE-NEXT: vmovnb.i32 q0, q1
74 ; BE-LABEL: test_vmovnbq_u32:
75 ; BE: @ %bb.0: @ %entry
76 ; BE-NEXT: vrev64.32 q2, q1
77 ; BE-NEXT: vrev64.16 q1, q0
78 ; BE-NEXT: vmovnb.i32 q1, q2
79 ; BE-NEXT: vrev64.16 q0, q1
82 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
83 %1 = tail call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %0)
84 %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
85 %3 = trunc <8 x i32> %2 to <8 x i16>
89 define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_s16(<16 x i8> %a, <8 x i16> %b) {
90 ; LE-LABEL: test_vmovntq_s16:
91 ; LE: @ %bb.0: @ %entry
92 ; LE-NEXT: vmovnt.i16 q0, q1
95 ; BE-LABEL: test_vmovntq_s16:
96 ; BE: @ %bb.0: @ %entry
97 ; BE-NEXT: vrev64.16 q2, q1
98 ; BE-NEXT: vrev64.8 q1, q0
99 ; BE-NEXT: vmovnt.i16 q1, q2
100 ; BE-NEXT: vrev64.8 q0, q1
103 %0 = tail call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
104 %1 = shufflevector <8 x i16> %0, <8 x i16> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
105 %2 = trunc <16 x i16> %1 to <16 x i8>
109 define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_s32(<8 x i16> %a, <4 x i32> %b) {
110 ; LE-LABEL: test_vmovntq_s32:
111 ; LE: @ %bb.0: @ %entry
112 ; LE-NEXT: vmovnt.i32 q0, q1
115 ; BE-LABEL: test_vmovntq_s32:
116 ; BE: @ %bb.0: @ %entry
117 ; BE-NEXT: vrev64.32 q2, q1
118 ; BE-NEXT: vrev64.16 q1, q0
119 ; BE-NEXT: vmovnt.i32 q1, q2
120 ; BE-NEXT: vrev64.16 q0, q1
123 %0 = tail call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
124 %1 = shufflevector <4 x i32> %0, <4 x i32> %b, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
125 %2 = trunc <8 x i32> %1 to <8 x i16>
129 define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_u16(<16 x i8> %a, <8 x i16> %b) {
130 ; LE-LABEL: test_vmovntq_u16:
131 ; LE: @ %bb.0: @ %entry
132 ; LE-NEXT: vmovnt.i16 q0, q1
135 ; BE-LABEL: test_vmovntq_u16:
136 ; BE: @ %bb.0: @ %entry
137 ; BE-NEXT: vrev64.16 q2, q1
138 ; BE-NEXT: vrev64.8 q1, q0
139 ; BE-NEXT: vmovnt.i16 q1, q2
140 ; BE-NEXT: vrev64.8 q0, q1
143 %0 = tail call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
144 %1 = shufflevector <8 x i16> %0, <8 x i16> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
145 %2 = trunc <16 x i16> %1 to <16 x i8>
149 define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_u32(<8 x i16> %a, <4 x i32> %b) {
150 ; LE-LABEL: test_vmovntq_u32:
151 ; LE: @ %bb.0: @ %entry
152 ; LE-NEXT: vmovnt.i32 q0, q1
155 ; BE-LABEL: test_vmovntq_u32:
156 ; BE: @ %bb.0: @ %entry
157 ; BE-NEXT: vrev64.32 q2, q1
158 ; BE-NEXT: vrev64.16 q1, q0
159 ; BE-NEXT: vmovnt.i32 q1, q2
160 ; BE-NEXT: vrev64.16 q0, q1
163 %0 = tail call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
164 %1 = shufflevector <4 x i32> %0, <4 x i32> %b, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
165 %2 = trunc <8 x i32> %1 to <8 x i16>
169 define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
170 ; LE-LABEL: test_vmovnbq_m_s16:
171 ; LE: @ %bb.0: @ %entry
172 ; LE-NEXT: vmsr p0, r0
174 ; LE-NEXT: vmovnbt.i16 q0, q1
177 ; BE-LABEL: test_vmovnbq_m_s16:
178 ; BE: @ %bb.0: @ %entry
179 ; BE-NEXT: vrev64.16 q2, q1
180 ; BE-NEXT: vrev64.8 q1, q0
181 ; BE-NEXT: vmsr p0, r0
183 ; BE-NEXT: vmovnbt.i16 q1, q2
184 ; BE-NEXT: vrev64.8 q0, q1
187 %0 = zext i16 %p to i32
188 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
189 %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
193 define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
194 ; LE-LABEL: test_vmovnbq_m_s32:
195 ; LE: @ %bb.0: @ %entry
196 ; LE-NEXT: vmsr p0, r0
198 ; LE-NEXT: vmovnbt.i32 q0, q1
201 ; BE-LABEL: test_vmovnbq_m_s32:
202 ; BE: @ %bb.0: @ %entry
203 ; BE-NEXT: vrev64.32 q2, q1
204 ; BE-NEXT: vrev64.16 q1, q0
205 ; BE-NEXT: vmsr p0, r0
207 ; BE-NEXT: vmovnbt.i32 q1, q2
208 ; BE-NEXT: vrev64.16 q0, q1
211 %0 = zext i16 %p to i32
212 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
213 %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
217 define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
218 ; LE-LABEL: test_vmovnbq_m_u16:
219 ; LE: @ %bb.0: @ %entry
220 ; LE-NEXT: vmsr p0, r0
222 ; LE-NEXT: vmovnbt.i16 q0, q1
225 ; BE-LABEL: test_vmovnbq_m_u16:
226 ; BE: @ %bb.0: @ %entry
227 ; BE-NEXT: vrev64.16 q2, q1
228 ; BE-NEXT: vrev64.8 q1, q0
229 ; BE-NEXT: vmsr p0, r0
231 ; BE-NEXT: vmovnbt.i16 q1, q2
232 ; BE-NEXT: vrev64.8 q0, q1
235 %0 = zext i16 %p to i32
236 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
237 %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
241 define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
242 ; LE-LABEL: test_vmovnbq_m_u32:
243 ; LE: @ %bb.0: @ %entry
244 ; LE-NEXT: vmsr p0, r0
246 ; LE-NEXT: vmovnbt.i32 q0, q1
249 ; BE-LABEL: test_vmovnbq_m_u32:
250 ; BE: @ %bb.0: @ %entry
251 ; BE-NEXT: vrev64.32 q2, q1
252 ; BE-NEXT: vrev64.16 q1, q0
253 ; BE-NEXT: vmsr p0, r0
255 ; BE-NEXT: vmovnbt.i32 q1, q2
256 ; BE-NEXT: vrev64.16 q0, q1
259 %0 = zext i16 %p to i32
260 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
261 %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
265 define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
266 ; LE-LABEL: test_vmovntq_m_s16:
267 ; LE: @ %bb.0: @ %entry
268 ; LE-NEXT: vmsr p0, r0
270 ; LE-NEXT: vmovntt.i16 q0, q1
273 ; BE-LABEL: test_vmovntq_m_s16:
274 ; BE: @ %bb.0: @ %entry
275 ; BE-NEXT: vrev64.16 q2, q1
276 ; BE-NEXT: vrev64.8 q1, q0
277 ; BE-NEXT: vmsr p0, r0
279 ; BE-NEXT: vmovntt.i16 q1, q2
280 ; BE-NEXT: vrev64.8 q0, q1
283 %0 = zext i16 %p to i32
284 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
285 %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
289 define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
290 ; LE-LABEL: test_vmovntq_m_s32:
291 ; LE: @ %bb.0: @ %entry
292 ; LE-NEXT: vmsr p0, r0
294 ; LE-NEXT: vmovntt.i32 q0, q1
297 ; BE-LABEL: test_vmovntq_m_s32:
298 ; BE: @ %bb.0: @ %entry
299 ; BE-NEXT: vrev64.32 q2, q1
300 ; BE-NEXT: vrev64.16 q1, q0
301 ; BE-NEXT: vmsr p0, r0
303 ; BE-NEXT: vmovntt.i32 q1, q2
304 ; BE-NEXT: vrev64.16 q0, q1
307 %0 = zext i16 %p to i32
308 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
309 %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
313 define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
314 ; LE-LABEL: test_vmovntq_m_u16:
315 ; LE: @ %bb.0: @ %entry
316 ; LE-NEXT: vmsr p0, r0
318 ; LE-NEXT: vmovntt.i16 q0, q1
321 ; BE-LABEL: test_vmovntq_m_u16:
322 ; BE: @ %bb.0: @ %entry
323 ; BE-NEXT: vrev64.16 q2, q1
324 ; BE-NEXT: vrev64.8 q1, q0
325 ; BE-NEXT: vmsr p0, r0
327 ; BE-NEXT: vmovntt.i16 q1, q2
328 ; BE-NEXT: vrev64.8 q0, q1
331 %0 = zext i16 %p to i32
332 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
333 %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
337 define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
338 ; LE-LABEL: test_vmovntq_m_u32:
339 ; LE: @ %bb.0: @ %entry
340 ; LE-NEXT: vmsr p0, r0
342 ; LE-NEXT: vmovntt.i32 q0, q1
345 ; BE-LABEL: test_vmovntq_m_u32:
346 ; BE: @ %bb.0: @ %entry
347 ; BE-NEXT: vrev64.32 q2, q1
348 ; BE-NEXT: vrev64.16 q1, q0
349 ; BE-NEXT: vmsr p0, r0
351 ; BE-NEXT: vmovntt.i32 q1, q2
352 ; BE-NEXT: vrev64.16 q0, q1
355 %0 = zext i16 %p to i32
356 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
357 %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
361 declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
362 declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
363 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
364 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
365 declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>)
366 declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>)