1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_s16(<16 x i8> %a, <8 x i16> %b) {
5 ; CHECK-LABEL: test_vqmovnbq_s16:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vqmovnb.s16 q0, q1
10 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 0)
14 define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_s32(<8 x i16> %a, <4 x i32> %b) {
15 ; CHECK-LABEL: test_vqmovnbq_s32:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vqmovnb.s32 q0, q1
20 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 0)
24 define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_u16(<16 x i8> %a, <8 x i16> %b) {
25 ; CHECK-LABEL: test_vqmovnbq_u16:
26 ; CHECK: @ %bb.0: @ %entry
27 ; CHECK-NEXT: vqmovnb.u16 q0, q1
30 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0)
34 define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_u32(<8 x i16> %a, <4 x i32> %b) {
35 ; CHECK-LABEL: test_vqmovnbq_u32:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vqmovnb.u32 q0, q1
40 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0)
44 define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_s16(<16 x i8> %a, <8 x i16> %b) {
45 ; CHECK-LABEL: test_vqmovntq_s16:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vqmovnt.s16 q0, q1
50 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 1)
54 define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_s32(<8 x i16> %a, <4 x i32> %b) {
55 ; CHECK-LABEL: test_vqmovntq_s32:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vqmovnt.s32 q0, q1
60 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 1)
64 define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_u16(<16 x i8> %a, <8 x i16> %b) {
65 ; CHECK-LABEL: test_vqmovntq_u16:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vqmovnt.u16 q0, q1
70 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1)
74 define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_u32(<8 x i16> %a, <4 x i32> %b) {
75 ; CHECK-LABEL: test_vqmovntq_u32:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vqmovnt.u32 q0, q1
80 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 1)
84 define arm_aapcs_vfpcc <16 x i8> @test_vqmovunbq_s16(<16 x i8> %a, <8 x i16> %b) {
85 ; CHECK-LABEL: test_vqmovunbq_s16:
86 ; CHECK: @ %bb.0: @ %entry
87 ; CHECK-NEXT: vqmovunb.s16 q0, q1
90 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0)
94 define arm_aapcs_vfpcc <8 x i16> @test_vqmovunbq_s32(<8 x i16> %a, <4 x i32> %b) {
95 ; CHECK-LABEL: test_vqmovunbq_s32:
96 ; CHECK: @ %bb.0: @ %entry
97 ; CHECK-NEXT: vqmovunb.s32 q0, q1
100 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 0)
104 define arm_aapcs_vfpcc <16 x i8> @test_vqmovuntq_s16(<16 x i8> %a, <8 x i16> %b) {
105 ; CHECK-LABEL: test_vqmovuntq_s16:
106 ; CHECK: @ %bb.0: @ %entry
107 ; CHECK-NEXT: vqmovunt.s16 q0, q1
110 %0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1)
114 define arm_aapcs_vfpcc <8 x i16> @test_vqmovuntq_s32(<8 x i16> %a, <4 x i32> %b) {
115 ; CHECK-LABEL: test_vqmovuntq_s32:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vqmovunt.s32 q0, q1
120 %0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 1)
124 define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
125 ; CHECK-LABEL: test_vqmovnbq_m_s16:
126 ; CHECK: @ %bb.0: @ %entry
127 ; CHECK-NEXT: vmsr p0, r0
129 ; CHECK-NEXT: vqmovnbt.s16 q0, q1
132 %0 = zext i16 %p to i32
133 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
134 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1)
138 define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
139 ; CHECK-LABEL: test_vqmovnbq_m_s32:
140 ; CHECK: @ %bb.0: @ %entry
141 ; CHECK-NEXT: vmsr p0, r0
143 ; CHECK-NEXT: vqmovnbt.s32 q0, q1
146 %0 = zext i16 %p to i32
147 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
148 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1)
152 define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
153 ; CHECK-LABEL: test_vqmovnbq_m_u16:
154 ; CHECK: @ %bb.0: @ %entry
155 ; CHECK-NEXT: vmsr p0, r0
157 ; CHECK-NEXT: vqmovnbt.u16 q0, q1
160 %0 = zext i16 %p to i32
161 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
162 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1)
166 define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
167 ; CHECK-LABEL: test_vqmovnbq_m_u32:
168 ; CHECK: @ %bb.0: @ %entry
169 ; CHECK-NEXT: vmsr p0, r0
171 ; CHECK-NEXT: vqmovnbt.u32 q0, q1
174 %0 = zext i16 %p to i32
175 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
176 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1)
180 define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
181 ; CHECK-LABEL: test_vqmovntq_m_s16:
182 ; CHECK: @ %bb.0: @ %entry
183 ; CHECK-NEXT: vmsr p0, r0
185 ; CHECK-NEXT: vqmovntt.s16 q0, q1
188 %0 = zext i16 %p to i32
189 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
190 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1)
194 define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
195 ; CHECK-LABEL: test_vqmovntq_m_s32:
196 ; CHECK: @ %bb.0: @ %entry
197 ; CHECK-NEXT: vmsr p0, r0
199 ; CHECK-NEXT: vqmovntt.s32 q0, q1
202 %0 = zext i16 %p to i32
203 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
204 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1)
208 define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
209 ; CHECK-LABEL: test_vqmovntq_m_u16:
210 ; CHECK: @ %bb.0: @ %entry
211 ; CHECK-NEXT: vmsr p0, r0
213 ; CHECK-NEXT: vqmovntt.u16 q0, q1
216 %0 = zext i16 %p to i32
217 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
218 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1)
222 define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
223 ; CHECK-LABEL: test_vqmovntq_m_u32:
224 ; CHECK: @ %bb.0: @ %entry
225 ; CHECK-NEXT: vmsr p0, r0
227 ; CHECK-NEXT: vqmovntt.u32 q0, q1
230 %0 = zext i16 %p to i32
231 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
232 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1)
236 define arm_aapcs_vfpcc <16 x i8> @test_vqmovunbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
237 ; CHECK-LABEL: test_vqmovunbq_m_s16:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vmsr p0, r0
241 ; CHECK-NEXT: vqmovunbt.s16 q0, q1
244 %0 = zext i16 %p to i32
245 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
246 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1)
250 define arm_aapcs_vfpcc <8 x i16> @test_vqmovunbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
251 ; CHECK-LABEL: test_vqmovunbq_m_s32:
252 ; CHECK: @ %bb.0: @ %entry
253 ; CHECK-NEXT: vmsr p0, r0
255 ; CHECK-NEXT: vqmovunbt.s32 q0, q1
258 %0 = zext i16 %p to i32
259 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
260 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1)
264 define arm_aapcs_vfpcc <16 x i8> @test_vqmovuntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
265 ; CHECK-LABEL: test_vqmovuntq_m_s16:
266 ; CHECK: @ %bb.0: @ %entry
267 ; CHECK-NEXT: vmsr p0, r0
269 ; CHECK-NEXT: vqmovuntt.s16 q0, q1
272 %0 = zext i16 %p to i32
273 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
274 %2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1)
278 define arm_aapcs_vfpcc <8 x i16> @test_vqmovuntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
279 ; CHECK-LABEL: test_vqmovuntq_m_s32:
280 ; CHECK: @ %bb.0: @ %entry
281 ; CHECK-NEXT: vmsr p0, r0
283 ; CHECK-NEXT: vqmovuntt.s32 q0, q1
286 %0 = zext i16 %p to i32
287 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
288 %2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1)
292 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
293 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
295 declare <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32)
296 declare <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32)
298 declare <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, i32, i32, <8 x i1>)
299 declare <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, i32, i32, <4 x i1>)