1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) {
5 ; CHECK-LABEL: test_vshlq_n_s8:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vshl.i8 q0, q0, #5
10 %0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
14 define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) {
15 ; CHECK-LABEL: test_vshlq_n_s16:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vshl.i16 q0, q0, #5
20 %0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
24 define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) {
25 ; CHECK-LABEL: test_vshlq_n_s32:
26 ; CHECK: @ %bb.0: @ %entry
27 ; CHECK-NEXT: vshl.i32 q0, q0, #18
30 %0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18>
34 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
35 ; CHECK-LABEL: test_vshrq_n_s8:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vshr.s8 q0, q0, #4
40 %0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
44 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
45 ; CHECK-LABEL: test_vshrq_n_s16:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vshr.s16 q0, q0, #10
50 %0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
54 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
55 ; CHECK-LABEL: test_vshrq_n_s32:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vshr.s32 q0, q0, #19
60 %0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19>
64 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
65 ; CHECK-LABEL: test_vshrq_n_u8:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vshr.u8 q0, q0, #1
70 %0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
75 ; CHECK-LABEL: test_vshrq_n_u16:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vshr.u16 q0, q0, #10
80 %0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
84 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
85 ; CHECK-LABEL: test_vshrq_n_u32:
86 ; CHECK: @ %bb.0: @ %entry
87 ; CHECK-NEXT: vshr.u32 q0, q0, #10
90 %0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
94 define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
95 ; CHECK-LABEL: test_vshlq_m_n_s8:
96 ; CHECK: @ %bb.0: @ %entry
97 ; CHECK-NEXT: vmsr p0, r0
99 ; CHECK-NEXT: vshlt.i8 q0, q1, #6
102 %0 = zext i16 %p to i32
103 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
104 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive)
108 define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
109 ; CHECK-LABEL: test_vshlq_m_n_s16:
110 ; CHECK: @ %bb.0: @ %entry
111 ; CHECK-NEXT: vmsr p0, r0
113 ; CHECK-NEXT: vshlt.i16 q0, q1, #13
116 %0 = zext i16 %p to i32
117 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
118 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive)
122 define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
123 ; CHECK-LABEL: test_vshlq_m_n_s32:
124 ; CHECK: @ %bb.0: @ %entry
125 ; CHECK-NEXT: vmsr p0, r0
127 ; CHECK-NEXT: vshlt.i32 q0, q1, #0
130 %0 = zext i16 %p to i32
131 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
132 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
136 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
137 ; CHECK-LABEL: test_vshrq_m_n_s8:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vmsr p0, r0
141 ; CHECK-NEXT: vshrt.s8 q0, q1, #2
144 %0 = zext i16 %p to i32
145 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
146 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
150 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
151 ; CHECK-LABEL: test_vshrq_m_n_s16:
152 ; CHECK: @ %bb.0: @ %entry
153 ; CHECK-NEXT: vmsr p0, r0
155 ; CHECK-NEXT: vshrt.s16 q0, q1, #3
158 %0 = zext i16 %p to i32
159 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
160 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive)
164 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
165 ; CHECK-LABEL: test_vshrq_m_n_s32:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: vmsr p0, r0
169 ; CHECK-NEXT: vshrt.s32 q0, q1, #13
172 %0 = zext i16 %p to i32
173 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
174 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive)
178 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
179 ; CHECK-LABEL: test_vshrq_m_n_u8:
180 ; CHECK: @ %bb.0: @ %entry
181 ; CHECK-NEXT: vmsr p0, r0
183 ; CHECK-NEXT: vshrt.u8 q0, q1, #4
186 %0 = zext i16 %p to i32
187 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
188 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
192 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
193 ; CHECK-LABEL: test_vshrq_m_n_u16:
194 ; CHECK: @ %bb.0: @ %entry
195 ; CHECK-NEXT: vmsr p0, r0
197 ; CHECK-NEXT: vshrt.u16 q0, q1, #14
200 %0 = zext i16 %p to i32
201 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
202 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive)
206 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
207 ; CHECK-LABEL: test_vshrq_m_n_u32:
208 ; CHECK: @ %bb.0: @ %entry
209 ; CHECK-NEXT: vmsr p0, r0
211 ; CHECK-NEXT: vshrt.u32 q0, q1, #21
214 %0 = zext i16 %p to i32
215 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
216 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive)
220 define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
221 ; CHECK-LABEL: test_vshlq_x_n_s8:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vmsr p0, r0
225 ; CHECK-NEXT: vshlt.i8 q0, q0, #1
228 %0 = zext i16 %p to i32
229 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
230 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef)
234 define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
235 ; CHECK-LABEL: test_vshlq_x_n_s16:
236 ; CHECK: @ %bb.0: @ %entry
237 ; CHECK-NEXT: vmsr p0, r0
239 ; CHECK-NEXT: vshlt.i16 q0, q0, #15
242 %0 = zext i16 %p to i32
243 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
244 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef)
248 define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
249 ; CHECK-LABEL: test_vshlq_x_n_s32:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vmsr p0, r0
253 ; CHECK-NEXT: vshlt.i32 q0, q0, #13
256 %0 = zext i16 %p to i32
257 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
258 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef)
262 define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
263 ; CHECK-LABEL: test_vshlq_x_n_u8:
264 ; CHECK: @ %bb.0: @ %entry
265 ; CHECK-NEXT: vmsr p0, r0
267 ; CHECK-NEXT: vshlt.i8 q0, q0, #4
270 %0 = zext i16 %p to i32
271 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
272 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef)
276 define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
277 ; CHECK-LABEL: test_vshlq_x_n_u16:
278 ; CHECK: @ %bb.0: @ %entry
279 ; CHECK-NEXT: vmsr p0, r0
281 ; CHECK-NEXT: vshlt.i16 q0, q0, #10
284 %0 = zext i16 %p to i32
285 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
286 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef)
290 define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
291 ; CHECK-LABEL: test_vshlq_x_n_u32:
292 ; CHECK: @ %bb.0: @ %entry
293 ; CHECK-NEXT: vmsr p0, r0
295 ; CHECK-NEXT: vshlt.i32 q0, q0, #30
298 %0 = zext i16 %p to i32
299 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
300 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef)
304 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
305 ; CHECK-LABEL: test_vshrq_x_n_s8:
306 ; CHECK: @ %bb.0: @ %entry
307 ; CHECK-NEXT: vmsr p0, r0
309 ; CHECK-NEXT: vshrt.s8 q0, q0, #4
312 %0 = zext i16 %p to i32
313 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
314 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef)
318 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
319 ; CHECK-LABEL: test_vshrq_x_n_s16:
320 ; CHECK: @ %bb.0: @ %entry
321 ; CHECK-NEXT: vmsr p0, r0
323 ; CHECK-NEXT: vshrt.s16 q0, q0, #10
326 %0 = zext i16 %p to i32
327 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
328 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef)
332 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
333 ; CHECK-LABEL: test_vshrq_x_n_s32:
334 ; CHECK: @ %bb.0: @ %entry
335 ; CHECK-NEXT: vmsr p0, r0
337 ; CHECK-NEXT: vshrt.s32 q0, q0, #7
340 %0 = zext i16 %p to i32
341 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
342 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef)
346 define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
347 ; CHECK-LABEL: test_vshrq_x_n_u8:
348 ; CHECK: @ %bb.0: @ %entry
349 ; CHECK-NEXT: vmsr p0, r0
351 ; CHECK-NEXT: vshrt.u8 q0, q0, #7
354 %0 = zext i16 %p to i32
355 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
356 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef)
360 define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
361 ; CHECK-LABEL: test_vshrq_x_n_u16:
362 ; CHECK: @ %bb.0: @ %entry
363 ; CHECK-NEXT: vmsr p0, r0
365 ; CHECK-NEXT: vshrt.u16 q0, q0, #7
368 %0 = zext i16 %p to i32
369 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
370 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef)
374 define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
375 ; CHECK-LABEL: test_vshrq_x_n_u32:
376 ; CHECK: @ %bb.0: @ %entry
377 ; CHECK-NEXT: vmsr p0, r0
379 ; CHECK-NEXT: vshrt.u32 q0, q0, #6
382 %0 = zext i16 %p to i32
383 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
388 define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
389 ; CHECK-LABEL: test_vqshlq_n_s8:
390 ; CHECK: @ %bb.0: @ %entry
391 ; CHECK-NEXT: vqshl.s8 q0, q0, #3
394 %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 3, i32 0)
398 define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
399 ; CHECK-LABEL: test_vqshlq_n_s16:
400 ; CHECK: @ %bb.0: @ %entry
401 ; CHECK-NEXT: vqshl.s16 q0, q0, #4
404 %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 4, i32 0)
408 define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
409 ; CHECK-LABEL: test_vqshlq_n_s32:
410 ; CHECK: @ %bb.0: @ %entry
411 ; CHECK-NEXT: vqshl.s32 q0, q0, #4
414 %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 4, i32 0)
418 define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
419 ; CHECK-LABEL: test_vqshlq_n_u8:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: vqshl.u8 q0, q0, #0
424 %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 0, i32 1)
428 define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
429 ; CHECK-LABEL: test_vqshlq_n_u16:
430 ; CHECK: @ %bb.0: @ %entry
431 ; CHECK-NEXT: vqshl.u16 q0, q0, #13
434 %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 13, i32 1)
438 define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
439 ; CHECK-LABEL: test_vqshlq_n_u32:
440 ; CHECK: @ %bb.0: @ %entry
441 ; CHECK-NEXT: vqshl.u32 q0, q0, #6
444 %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 6, i32 1)
448 define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
449 ; CHECK-LABEL: test_vqshluq_n_s8:
450 ; CHECK: @ %bb.0: @ %entry
451 ; CHECK-NEXT: vqshlu.s8 q0, q0, #5
454 %0 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> %a, i32 5)
458 define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
459 ; CHECK-LABEL: test_vqshluq_n_s16:
460 ; CHECK: @ %bb.0: @ %entry
461 ; CHECK-NEXT: vqshlu.s16 q0, q0, #5
464 %0 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> %a, i32 5)
468 define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
469 ; CHECK-LABEL: test_vqshluq_n_s32:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vqshlu.s32 q0, q0, #4
474 %0 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> %a, i32 4)
478 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
479 ; CHECK-LABEL: test_vrshrq_n_s8:
480 ; CHECK: @ %bb.0: @ %entry
481 ; CHECK-NEXT: vrshr.s8 q0, q0, #4
484 %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 4, i32 0)
488 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
489 ; CHECK-LABEL: test_vrshrq_n_s16:
490 ; CHECK: @ %bb.0: @ %entry
491 ; CHECK-NEXT: vrshr.s16 q0, q0, #12
494 %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 12, i32 0)
498 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
499 ; CHECK-LABEL: test_vrshrq_n_s32:
500 ; CHECK: @ %bb.0: @ %entry
501 ; CHECK-NEXT: vrshr.s32 q0, q0, #30
504 %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 30, i32 0)
508 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
509 ; CHECK-LABEL: test_vrshrq_n_u8:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vrshr.u8 q0, q0, #1
514 %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 1, i32 1)
518 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
519 ; CHECK-LABEL: test_vrshrq_n_u16:
520 ; CHECK: @ %bb.0: @ %entry
521 ; CHECK-NEXT: vrshr.u16 q0, q0, #15
524 %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 15, i32 1)
528 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
529 ; CHECK-LABEL: test_vrshrq_n_u32:
530 ; CHECK: @ %bb.0: @ %entry
531 ; CHECK-NEXT: vrshr.u32 q0, q0, #20
534 %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 20, i32 1)
538 define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
539 ; CHECK-LABEL: test_vqshlq_m_n_s8:
540 ; CHECK: @ %bb.0: @ %entry
541 ; CHECK-NEXT: vmsr p0, r0
543 ; CHECK-NEXT: vqshlt.s8 q0, q1, #6
546 %0 = zext i16 %p to i32
547 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
548 %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, <16 x i1> %1, <16 x i8> %inactive)
552 define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
553 ; CHECK-LABEL: test_vqshlq_m_n_s16:
554 ; CHECK: @ %bb.0: @ %entry
555 ; CHECK-NEXT: vmsr p0, r0
557 ; CHECK-NEXT: vqshlt.s16 q0, q1, #13
560 %0 = zext i16 %p to i32
561 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
562 %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 0, <8 x i1> %1, <8 x i16> %inactive)
566 define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
567 ; CHECK-LABEL: test_vqshlq_m_n_s32:
568 ; CHECK: @ %bb.0: @ %entry
569 ; CHECK-NEXT: vmsr p0, r0
571 ; CHECK-NEXT: vqshlt.s32 q0, q1, #14
574 %0 = zext i16 %p to i32
575 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
576 %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 14, i32 0, <4 x i1> %1, <4 x i32> %inactive)
580 define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
581 ; CHECK-LABEL: test_vqshlq_m_n_u8:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vmsr p0, r0
585 ; CHECK-NEXT: vqshlt.u8 q0, q1, #4
588 %0 = zext i16 %p to i32
589 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
590 %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
594 define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
595 ; CHECK-LABEL: test_vqshlq_m_n_u16:
596 ; CHECK: @ %bb.0: @ %entry
597 ; CHECK-NEXT: vmsr p0, r0
599 ; CHECK-NEXT: vqshlt.u16 q0, q1, #9
602 %0 = zext i16 %p to i32
603 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
604 %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, <8 x i1> %1, <8 x i16> %inactive)
608 define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
609 ; CHECK-LABEL: test_vqshlq_m_n_u32:
610 ; CHECK: @ %bb.0: @ %entry
611 ; CHECK-NEXT: vmsr p0, r0
613 ; CHECK-NEXT: vqshlt.u32 q0, q1, #25
616 %0 = zext i16 %p to i32
617 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
618 %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25, i32 1, <4 x i1> %1, <4 x i32> %inactive)
622 define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
623 ; CHECK-LABEL: test_vqshluq_m_n_s8:
624 ; CHECK: @ %bb.0: @ %entry
625 ; CHECK-NEXT: vmsr p0, r0
627 ; CHECK-NEXT: vqshlut.s8 q0, q1, #2
630 %0 = zext i16 %p to i32
631 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
632 %2 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, <16 x i1> %1, <16 x i8> %inactive)
636 define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
637 ; CHECK-LABEL: test_vqshluq_m_n_s16:
638 ; CHECK: @ %bb.0: @ %entry
639 ; CHECK-NEXT: vmsr p0, r0
641 ; CHECK-NEXT: vqshlut.s16 q0, q1, #12
644 %0 = zext i16 %p to i32
645 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
646 %2 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, <8 x i1> %1, <8 x i16> %inactive)
650 define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
651 ; CHECK-LABEL: test_vqshluq_m_n_s32:
652 ; CHECK: @ %bb.0: @ %entry
653 ; CHECK-NEXT: vmsr p0, r0
655 ; CHECK-NEXT: vqshlut.s32 q0, q1, #24
658 %0 = zext i16 %p to i32
659 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
660 %2 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, <4 x i1> %1, <4 x i32> %inactive)
664 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
665 ; CHECK-LABEL: test_vrshrq_m_n_s8:
666 ; CHECK: @ %bb.0: @ %entry
667 ; CHECK-NEXT: vmsr p0, r0
669 ; CHECK-NEXT: vrshrt.s8 q0, q1, #2
672 %0 = zext i16 %p to i32
673 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
674 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
678 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
679 ; CHECK-LABEL: test_vrshrq_m_n_s16:
680 ; CHECK: @ %bb.0: @ %entry
681 ; CHECK-NEXT: vmsr p0, r0
683 ; CHECK-NEXT: vrshrt.s16 q0, q1, #11
686 %0 = zext i16 %p to i32
687 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
688 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 11, i32 0, <8 x i1> %1, <8 x i16> %inactive)
692 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
693 ; CHECK-LABEL: test_vrshrq_m_n_s32:
694 ; CHECK: @ %bb.0: @ %entry
695 ; CHECK-NEXT: vmsr p0, r0
697 ; CHECK-NEXT: vrshrt.s32 q0, q1, #24
700 %0 = zext i16 %p to i32
701 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
702 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, i32 0, <4 x i1> %1, <4 x i32> %inactive)
706 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
707 ; CHECK-LABEL: test_vrshrq_m_n_u8:
708 ; CHECK: @ %bb.0: @ %entry
709 ; CHECK-NEXT: vmsr p0, r0
711 ; CHECK-NEXT: vrshrt.u8 q0, q1, #7
714 %0 = zext i16 %p to i32
715 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
716 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> %inactive)
720 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
721 ; CHECK-LABEL: test_vrshrq_m_n_u16:
722 ; CHECK: @ %bb.0: @ %entry
723 ; CHECK-NEXT: vmsr p0, r0
725 ; CHECK-NEXT: vrshrt.u16 q0, q1, #4
728 %0 = zext i16 %p to i32
729 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
730 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 4, i32 1, <8 x i1> %1, <8 x i16> %inactive)
734 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
735 ; CHECK-LABEL: test_vrshrq_m_n_u32:
736 ; CHECK: @ %bb.0: @ %entry
737 ; CHECK-NEXT: vmsr p0, r0
739 ; CHECK-NEXT: vrshrt.u32 q0, q1, #27
742 %0 = zext i16 %p to i32
743 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
744 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 27, i32 1, <4 x i1> %1, <4 x i32> %inactive)
748 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
749 ; CHECK-LABEL: test_vrshrq_x_n_s8:
750 ; CHECK: @ %bb.0: @ %entry
751 ; CHECK-NEXT: vmsr p0, r0
753 ; CHECK-NEXT: vrshrt.s8 q0, q0, #3
756 %0 = zext i16 %p to i32
757 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
758 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 3, i32 0, <16 x i1> %1, <16 x i8> undef)
762 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
763 ; CHECK-LABEL: test_vrshrq_x_n_s16:
764 ; CHECK: @ %bb.0: @ %entry
765 ; CHECK-NEXT: vmsr p0, r0
767 ; CHECK-NEXT: vrshrt.s16 q0, q0, #12
770 %0 = zext i16 %p to i32
771 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
772 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, <8 x i1> %1, <8 x i16> undef)
776 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
777 ; CHECK-LABEL: test_vrshrq_x_n_s32:
778 ; CHECK: @ %bb.0: @ %entry
779 ; CHECK-NEXT: vmsr p0, r0
781 ; CHECK-NEXT: vrshrt.s32 q0, q0, #20
784 %0 = zext i16 %p to i32
785 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
786 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 20, i32 0, <4 x i1> %1, <4 x i32> undef)
790 define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
791 ; CHECK-LABEL: test_vrshrq_x_n_u8:
792 ; CHECK: @ %bb.0: @ %entry
793 ; CHECK-NEXT: vmsr p0, r0
795 ; CHECK-NEXT: vrshrt.u8 q0, q0, #1
798 %0 = zext i16 %p to i32
799 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
800 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef)
804 define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
805 ; CHECK-LABEL: test_vrshrq_x_n_u16:
806 ; CHECK: @ %bb.0: @ %entry
807 ; CHECK-NEXT: vmsr p0, r0
809 ; CHECK-NEXT: vrshrt.u16 q0, q0, #13
812 %0 = zext i16 %p to i32
813 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
814 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 1, <8 x i1> %1, <8 x i16> undef)
818 define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
819 ; CHECK-LABEL: test_vrshrq_x_n_u32:
820 ; CHECK: @ %bb.0: @ %entry
821 ; CHECK-NEXT: vmsr p0, r0
823 ; CHECK-NEXT: vrshrt.u32 q0, q0, #6
826 %0 = zext i16 %p to i32
827 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
828 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
832 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8(<16 x i8> %a) {
833 ; CHECK-LABEL: test_vshllbq_n_s8:
834 ; CHECK: @ %bb.0: @ %entry
835 ; CHECK-NEXT: vshllb.s8 q0, q0, #2
838 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 2, i32 0, i32 0)
842 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8_lanewidth(<16 x i8> %a) {
843 ; CHECK-LABEL: test_vshllbq_n_s8_lanewidth:
844 ; CHECK: @ %bb.0: @ %entry
845 ; CHECK-NEXT: vshllb.s8 q0, q0, #8
848 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 0)
852 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16(<8 x i16> %a) {
853 ; CHECK-LABEL: test_vshllbq_n_s16:
854 ; CHECK: @ %bb.0: @ %entry
855 ; CHECK-NEXT: vshllb.s16 q0, q0, #13
858 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 13, i32 0, i32 0)
862 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16_lanewidth(<8 x i16> %a) {
863 ; CHECK-LABEL: test_vshllbq_n_s16_lanewidth:
864 ; CHECK: @ %bb.0: @ %entry
865 ; CHECK-NEXT: vshllb.s16 q0, q0, #16
868 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 0)
872 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8(<16 x i8> %a) {
873 ; CHECK-LABEL: test_vshllbq_n_u8:
874 ; CHECK: @ %bb.0: @ %entry
875 ; CHECK-NEXT: vshllb.u8 q0, q0, #5
878 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 5, i32 1, i32 0)
882 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8_lanewidth(<16 x i8> %a) {
883 ; CHECK-LABEL: test_vshllbq_n_u8_lanewidth:
884 ; CHECK: @ %bb.0: @ %entry
885 ; CHECK-NEXT: vshllb.u8 q0, q0, #8
888 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 0)
892 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16(<8 x i16> %a) {
893 ; CHECK-LABEL: test_vshllbq_n_u16:
894 ; CHECK: @ %bb.0: @ %entry
895 ; CHECK-NEXT: vshllb.u16 q0, q0, #6
898 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 6, i32 1, i32 0)
902 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16_lanewidth(<8 x i16> %a) {
903 ; CHECK-LABEL: test_vshllbq_n_u16_lanewidth:
904 ; CHECK: @ %bb.0: @ %entry
905 ; CHECK-NEXT: vshllb.u16 q0, q0, #16
908 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 0)
912 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8(<16 x i8> %a) {
913 ; CHECK-LABEL: test_vshlltq_n_s8:
914 ; CHECK: @ %bb.0: @ %entry
915 ; CHECK-NEXT: vshllt.s8 q0, q0, #7
918 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 0, i32 1)
922 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8_lanewidth(<16 x i8> %a) {
923 ; CHECK-LABEL: test_vshlltq_n_s8_lanewidth:
924 ; CHECK: @ %bb.0: @ %entry
925 ; CHECK-NEXT: vshllt.s8 q0, q0, #8
928 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 1)
932 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16(<8 x i16> %a) {
933 ; CHECK-LABEL: test_vshlltq_n_s16:
934 ; CHECK: @ %bb.0: @ %entry
935 ; CHECK-NEXT: vshllt.s16 q0, q0, #2
938 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 2, i32 0, i32 1)
942 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16_lanewidth(<8 x i16> %a) {
943 ; CHECK-LABEL: test_vshlltq_n_s16_lanewidth:
944 ; CHECK: @ %bb.0: @ %entry
945 ; CHECK-NEXT: vshllt.s16 q0, q0, #16
948 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 1)
952 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8(<16 x i8> %a) {
953 ; CHECK-LABEL: test_vshlltq_n_u8:
954 ; CHECK: @ %bb.0: @ %entry
955 ; CHECK-NEXT: vshllt.u8 q0, q0, #7
958 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 1, i32 1)
962 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8_lanewidth(<16 x i8> %a) {
963 ; CHECK-LABEL: test_vshlltq_n_u8_lanewidth:
964 ; CHECK: @ %bb.0: @ %entry
965 ; CHECK-NEXT: vshllt.u8 q0, q0, #8
968 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 1)
972 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16(<8 x i16> %a) {
973 ; CHECK-LABEL: test_vshlltq_n_u16:
974 ; CHECK: @ %bb.0: @ %entry
975 ; CHECK-NEXT: vshllt.u16 q0, q0, #14
978 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 14, i32 1, i32 1)
982 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16_lanewidth(<8 x i16> %a) {
983 ; CHECK-LABEL: test_vshlltq_n_u16_lanewidth:
984 ; CHECK: @ %bb.0: @ %entry
985 ; CHECK-NEXT: vshllt.u16 q0, q0, #16
988 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 1)
992 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
993 ; CHECK-LABEL: test_vshllbq_m_n_s8:
994 ; CHECK: @ %bb.0: @ %entry
995 ; CHECK-NEXT: vmsr p0, r0
997 ; CHECK-NEXT: vshllbt.s8 q0, q1, #6
1000 %0 = zext i16 %p to i32
1001 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1002 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1006 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1007 ; CHECK-LABEL: test_vshllbq_m_n_s8_lanewidth:
1008 ; CHECK: @ %bb.0: @ %entry
1009 ; CHECK-NEXT: vmsr p0, r0
1011 ; CHECK-NEXT: vshllbt.s8 q0, q1, #8
1014 %0 = zext i16 %p to i32
1015 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1016 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1020 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1021 ; CHECK-LABEL: test_vshllbq_m_n_s16:
1022 ; CHECK: @ %bb.0: @ %entry
1023 ; CHECK-NEXT: vmsr p0, r0
1025 ; CHECK-NEXT: vshllbt.s16 q0, q1, #10
1028 %0 = zext i16 %p to i32
1029 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1030 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1034 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1035 ; CHECK-LABEL: test_vshllbq_m_n_s16_lanewidth:
1036 ; CHECK: @ %bb.0: @ %entry
1037 ; CHECK-NEXT: vmsr p0, r0
1039 ; CHECK-NEXT: vshllbt.s16 q0, q1, #16
1042 %0 = zext i16 %p to i32
1043 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1044 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1048 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1049 ; CHECK-LABEL: test_vshllbq_m_n_u8:
1050 ; CHECK: @ %bb.0: @ %entry
1051 ; CHECK-NEXT: vmsr p0, r0
1053 ; CHECK-NEXT: vshllbt.u8 q0, q1, #3
1056 %0 = zext i16 %p to i32
1057 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1058 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 3, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1062 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1063 ; CHECK-LABEL: test_vshllbq_m_n_u8_lanewidth:
1064 ; CHECK: @ %bb.0: @ %entry
1065 ; CHECK-NEXT: vmsr p0, r0
1067 ; CHECK-NEXT: vshllbt.u8 q0, q1, #8
1070 %0 = zext i16 %p to i32
1071 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1072 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1076 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1077 ; CHECK-LABEL: test_vshllbq_m_n_u16:
1078 ; CHECK: @ %bb.0: @ %entry
1079 ; CHECK-NEXT: vmsr p0, r0
1081 ; CHECK-NEXT: vshllbt.u16 q0, q1, #14
1084 %0 = zext i16 %p to i32
1085 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1086 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 14, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1090 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1091 ; CHECK-LABEL: test_vshllbq_m_n_u16_lanewidth:
1092 ; CHECK: @ %bb.0: @ %entry
1093 ; CHECK-NEXT: vmsr p0, r0
1095 ; CHECK-NEXT: vshllbt.u16 q0, q1, #16
1098 %0 = zext i16 %p to i32
1099 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1100 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1104 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1105 ; CHECK-LABEL: test_vshlltq_m_n_s8:
1106 ; CHECK: @ %bb.0: @ %entry
1107 ; CHECK-NEXT: vmsr p0, r0
1109 ; CHECK-NEXT: vshlltt.s8 q0, q1, #4
1112 %0 = zext i16 %p to i32
1113 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1114 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 4, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1118 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1119 ; CHECK-LABEL: test_vshlltq_m_n_s8_lanewidth:
1120 ; CHECK: @ %bb.0: @ %entry
1121 ; CHECK-NEXT: vmsr p0, r0
1123 ; CHECK-NEXT: vshlltt.s8 q0, q1, #8
1126 %0 = zext i16 %p to i32
1127 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1128 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1132 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1133 ; CHECK-LABEL: test_vshlltq_m_n_s16:
1134 ; CHECK: @ %bb.0: @ %entry
1135 ; CHECK-NEXT: vmsr p0, r0
1137 ; CHECK-NEXT: vshlltt.s16 q0, q1, #12
1140 %0 = zext i16 %p to i32
1141 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1142 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 12, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1146 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1147 ; CHECK-LABEL: test_vshlltq_m_n_s16_lanewidth:
1148 ; CHECK: @ %bb.0: @ %entry
1149 ; CHECK-NEXT: vmsr p0, r0
1151 ; CHECK-NEXT: vshlltt.s16 q0, q1, #16
1154 %0 = zext i16 %p to i32
1155 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1156 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1160 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1161 ; CHECK-LABEL: test_vshlltq_m_n_u8:
1162 ; CHECK: @ %bb.0: @ %entry
1163 ; CHECK-NEXT: vmsr p0, r0
1165 ; CHECK-NEXT: vshlltt.u8 q0, q1, #2
1168 %0 = zext i16 %p to i32
1169 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1170 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1174 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1175 ; CHECK-LABEL: test_vshlltq_m_n_u8_lanewidth:
1176 ; CHECK: @ %bb.0: @ %entry
1177 ; CHECK-NEXT: vmsr p0, r0
1179 ; CHECK-NEXT: vshlltt.u8 q0, q1, #8
1182 %0 = zext i16 %p to i32
1183 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1184 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1188 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1189 ; CHECK-LABEL: test_vshlltq_m_n_u16:
1190 ; CHECK: @ %bb.0: @ %entry
1191 ; CHECK-NEXT: vmsr p0, r0
1193 ; CHECK-NEXT: vshlltt.u16 q0, q1, #9
1196 %0 = zext i16 %p to i32
1197 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1198 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 9, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1202 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1203 ; CHECK-LABEL: test_vshlltq_m_n_u16_lanewidth:
1204 ; CHECK: @ %bb.0: @ %entry
1205 ; CHECK-NEXT: vmsr p0, r0
1207 ; CHECK-NEXT: vshlltt.u16 q0, q1, #16
1210 %0 = zext i16 %p to i32
1211 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1212 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1216 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
1217 ; CHECK-LABEL: test_vshllbq_x_n_s8:
1218 ; CHECK: @ %bb.0: @ %entry
1219 ; CHECK-NEXT: vmsr p0, r0
1221 ; CHECK-NEXT: vshllbt.s8 q0, q0, #1
1224 %0 = zext i16 %p to i32
1225 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1226 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
1230 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1231 ; CHECK-LABEL: test_vshllbq_x_n_s8_lanewidth:
1232 ; CHECK: @ %bb.0: @ %entry
1233 ; CHECK-NEXT: vmsr p0, r0
1235 ; CHECK-NEXT: vshllbt.s8 q0, q0, #8
1238 %0 = zext i16 %p to i32
1239 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1240 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
1244 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
1245 ; CHECK-LABEL: test_vshllbq_x_n_s16:
1246 ; CHECK: @ %bb.0: @ %entry
1247 ; CHECK-NEXT: vmsr p0, r0
1249 ; CHECK-NEXT: vshllbt.s16 q0, q0, #10
1252 %0 = zext i16 %p to i32
1253 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1254 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
1258 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1259 ; CHECK-LABEL: test_vshllbq_x_n_s16_lanewidth:
1260 ; CHECK: @ %bb.0: @ %entry
1261 ; CHECK-NEXT: vmsr p0, r0
1263 ; CHECK-NEXT: vshllbt.s16 q0, q0, #16
1266 %0 = zext i16 %p to i32
1267 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1268 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
1272 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
1273 ; CHECK-LABEL: test_vshllbq_x_n_u8:
1274 ; CHECK: @ %bb.0: @ %entry
1275 ; CHECK-NEXT: vmsr p0, r0
1277 ; CHECK-NEXT: vshllbt.u8 q0, q0, #6
1280 %0 = zext i16 %p to i32
1281 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1282 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
1286 define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1287 ; CHECK-LABEL: test_vshllbq_x_n_u8_lanewidth:
1288 ; CHECK: @ %bb.0: @ %entry
1289 ; CHECK-NEXT: vmsr p0, r0
1291 ; CHECK-NEXT: vshllbt.u8 q0, q0, #8
1294 %0 = zext i16 %p to i32
1295 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1296 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
1300 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
1301 ; CHECK-LABEL: test_vshllbq_x_n_u16:
1302 ; CHECK: @ %bb.0: @ %entry
1303 ; CHECK-NEXT: vmsr p0, r0
1305 ; CHECK-NEXT: vshllbt.u16 q0, q0, #10
1308 %0 = zext i16 %p to i32
1309 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1310 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
1314 define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1315 ; CHECK-LABEL: test_vshllbq_x_n_u16_lanewidth:
1316 ; CHECK: @ %bb.0: @ %entry
1317 ; CHECK-NEXT: vmsr p0, r0
1319 ; CHECK-NEXT: vshllbt.u16 q0, q0, #16
1322 %0 = zext i16 %p to i32
1323 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1324 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
1328 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
1329 ; CHECK-LABEL: test_vshlltq_x_n_s8:
1330 ; CHECK: @ %bb.0: @ %entry
1331 ; CHECK-NEXT: vmsr p0, r0
1333 ; CHECK-NEXT: vshlltt.s8 q0, q0, #2
1336 %0 = zext i16 %p to i32
1337 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1338 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
1342 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1343 ; CHECK-LABEL: test_vshlltq_x_n_s8_lanewidth:
1344 ; CHECK: @ %bb.0: @ %entry
1345 ; CHECK-NEXT: vmsr p0, r0
1347 ; CHECK-NEXT: vshlltt.s8 q0, q0, #8
1350 %0 = zext i16 %p to i32
1351 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1352 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
1356 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
1357 ; CHECK-LABEL: test_vshlltq_x_n_s16:
1358 ; CHECK: @ %bb.0: @ %entry
1359 ; CHECK-NEXT: vmsr p0, r0
1361 ; CHECK-NEXT: vshlltt.s16 q0, q0, #6
1364 %0 = zext i16 %p to i32
1365 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1366 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 6, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
1370 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1371 ; CHECK-LABEL: test_vshlltq_x_n_s16_lanewidth:
1372 ; CHECK: @ %bb.0: @ %entry
1373 ; CHECK-NEXT: vmsr p0, r0
1375 ; CHECK-NEXT: vshlltt.s16 q0, q0, #16
1378 %0 = zext i16 %p to i32
1379 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1380 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
1384 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
1385 ; CHECK-LABEL: test_vshlltq_x_n_u8:
1386 ; CHECK: @ %bb.0: @ %entry
1387 ; CHECK-NEXT: vmsr p0, r0
1389 ; CHECK-NEXT: vshlltt.u8 q0, q0, #5
1392 %0 = zext i16 %p to i32
1393 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1394 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 5, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
1398 define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1399 ; CHECK-LABEL: test_vshlltq_x_n_u8_lanewidth:
1400 ; CHECK: @ %bb.0: @ %entry
1401 ; CHECK-NEXT: vmsr p0, r0
1403 ; CHECK-NEXT: vshlltt.u8 q0, q0, #8
1406 %0 = zext i16 %p to i32
1407 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1408 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
1412 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
1413 ; CHECK-LABEL: test_vshlltq_x_n_u16:
1414 ; CHECK: @ %bb.0: @ %entry
1415 ; CHECK-NEXT: vmsr p0, r0
1417 ; CHECK-NEXT: vshlltt.u16 q0, q0, #3
1420 %0 = zext i16 %p to i32
1421 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1422 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 3, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
1426 define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1427 ; CHECK-LABEL: test_vshlltq_x_n_u16_lanewidth:
1428 ; CHECK: @ %bb.0: @ %entry
1429 ; CHECK-NEXT: vmsr p0, r0
1431 ; CHECK-NEXT: vshlltt.u16 q0, q0, #16
1434 %0 = zext i16 %p to i32
1435 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1436 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
1440 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
1441 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
1442 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
1444 declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
1445 declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
1446 declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
1448 declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1449 declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1450 declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1452 declare <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8>, i32, i32)
1453 declare <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16>, i32, i32)
1454 declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32)
1455 declare <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1456 declare <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1457 declare <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1459 declare <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8>, i32)
1460 declare <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16>, i32)
1461 declare <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32>, i32)
1462 declare <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
1463 declare <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
1464 declare <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
1466 declare <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8>, i32, i32)
1467 declare <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16>, i32, i32)
1468 declare <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32>, i32, i32)
1469 declare <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1470 declare <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1471 declare <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1473 declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32)
1474 declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32)
1475 declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, i32, <8 x i1>, <8 x i16>)
1476 declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, i32, <4 x i1>, <4 x i32>)