1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
5 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
7 declare <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half>, <4 x float>, i32)
8 declare <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half>, <4 x float>, i32, <4 x i1>)
9 declare <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half>, i32)
10 declare <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float>, <8 x half>, i32, <4 x i1>)
12 declare <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32, <8 x i16>, i32)
13 declare <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32, <4 x i32>, i32)
14 declare <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32, <8 x half>, i32)
15 declare <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32, <4 x float>, i32)
16 declare <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32, <8 x half>, <8 x i16>, i32, <8 x i1>)
17 declare <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32, <4 x float>, <4 x i32>, i32, <4 x i1>)
18 declare <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, i32, <8 x i1>)
19 declare <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, i32, <4 x i1>)
21 define arm_aapcs_vfpcc <8 x half> @test_vcvttq_f16_f32(<8 x half> %a, <4 x float> %b) {
22 ; CHECK-LABEL: test_vcvttq_f16_f32:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vcvtt.f16.f32 q0, q1
27 %0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 1)
31 define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_f16_f32(<8 x half> %a, <4 x float> %b) {
32 ; CHECK-LABEL: test_vcvtbq_f16_f32:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vcvtb.f16.f32 q0, q1
37 %0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 0)
41 define arm_aapcs_vfpcc <8 x half> @test_vcvttq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
42 ; CHECK-LABEL: test_vcvttq_m_f16_f32:
43 ; CHECK: @ %bb.0: @ %entry
44 ; CHECK-NEXT: vmsr p0, r0
46 ; CHECK-NEXT: vcvttt.f16.f32 q0, q1
49 %0 = zext i16 %p to i32
50 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
51 %2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 1, <4 x i1> %1)
55 define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
56 ; CHECK-LABEL: test_vcvtbq_m_f16_f32:
57 ; CHECK: @ %bb.0: @ %entry
58 ; CHECK-NEXT: vmsr p0, r0
60 ; CHECK-NEXT: vcvtbt.f16.f32 q0, q1
63 %0 = zext i16 %p to i32
64 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
65 %2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 0, <4 x i1> %1)
69 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
70 ; CHECK-LABEL: test_vcvtq_n_f16_s16:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vcvt.f16.s16 q0, q0, #1
75 %0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> %a, i32 1)
79 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
80 ; CHECK-LABEL: test_vcvtq_n_f16_u16:
81 ; CHECK: @ %bb.0: @ %entry
82 ; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2
85 %0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> %a, i32 2)
89 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
90 ; CHECK-LABEL: test_vcvtq_n_f32_s32:
91 ; CHECK: @ %bb.0: @ %entry
92 ; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
95 %0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> %a, i32 3)
99 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
100 ; CHECK-LABEL: test_vcvtq_n_f32_u32:
101 ; CHECK: @ %bb.0: @ %entry
102 ; CHECK-NEXT: vcvt.f32.u32 q0, q0, #32
105 %0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> %a, i32 32)
109 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
110 ; CHECK-LABEL: test_vcvtq_n_s16_f16:
111 ; CHECK: @ %bb.0: @ %entry
112 ; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1
115 %0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> %a, i32 1)
119 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
120 ; CHECK-LABEL: test_vcvtq_n_u16_f16:
121 ; CHECK: @ %bb.0: @ %entry
122 ; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2
125 %0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> %a, i32 2)
129 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
130 ; CHECK-LABEL: test_vcvtq_n_s32_f32:
131 ; CHECK: @ %bb.0: @ %entry
132 ; CHECK-NEXT: vcvt.s32.f32 q0, q0, #3
135 %0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> %a, i32 3)
139 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
140 ; CHECK-LABEL: test_vcvtq_n_u32_f32:
141 ; CHECK: @ %bb.0: @ %entry
142 ; CHECK-NEXT: vcvt.u32.f32 q0, q0, #32
145 %0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> %a, i32 32)
149 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
150 ; CHECK-LABEL: test_vcvtq_m_n_f16_s16:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vmsr p0, r0
154 ; CHECK-NEXT: vcvtt.f16.s16 q0, q1, #1
157 %0 = zext i16 %p to i32
158 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
159 %2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> %inactive, <8 x i16> %a, i32 1, <8 x i1> %1)
163 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
164 ; CHECK-LABEL: test_vcvtq_m_n_f16_u16:
165 ; CHECK: @ %bb.0: @ %entry
166 ; CHECK-NEXT: vmsr p0, r0
168 ; CHECK-NEXT: vcvtt.f16.u16 q0, q1, #2
171 %0 = zext i16 %p to i32
172 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
173 %2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> %inactive, <8 x i16> %a, i32 2, <8 x i1> %1)
177 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
178 ; CHECK-LABEL: test_vcvtq_m_n_f32_s32:
179 ; CHECK: @ %bb.0: @ %entry
180 ; CHECK-NEXT: vmsr p0, r0
182 ; CHECK-NEXT: vcvtt.f32.s32 q0, q1, #3
185 %0 = zext i16 %p to i32
186 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
187 %2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> %inactive, <4 x i32> %a, i32 3, <4 x i1> %1)
191 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
192 ; CHECK-LABEL: test_vcvtq_m_n_f32_u32:
193 ; CHECK: @ %bb.0: @ %entry
194 ; CHECK-NEXT: vmsr p0, r0
196 ; CHECK-NEXT: vcvtt.f32.u32 q0, q1, #32
199 %0 = zext i16 %p to i32
200 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
201 %2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> %inactive, <4 x i32> %a, i32 32, <4 x i1> %1)
205 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
206 ; CHECK-LABEL: test_vcvtq_m_n_s16_f16:
207 ; CHECK: @ %bb.0: @ %entry
208 ; CHECK-NEXT: vmsr p0, r0
210 ; CHECK-NEXT: vcvtt.s16.f16 q0, q1, #1
213 %0 = zext i16 %p to i32
214 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
215 %2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, i32 1, <8 x i1> %1)
219 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
220 ; CHECK-LABEL: test_vcvtq_m_n_u16_f16:
221 ; CHECK: @ %bb.0: @ %entry
222 ; CHECK-NEXT: vmsr p0, r0
224 ; CHECK-NEXT: vcvtt.u16.f16 q0, q1, #2
227 %0 = zext i16 %p to i32
228 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
229 %2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, i32 2, <8 x i1> %1)
233 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
234 ; CHECK-LABEL: test_vcvtq_m_n_s32_f32:
235 ; CHECK: @ %bb.0: @ %entry
236 ; CHECK-NEXT: vmsr p0, r0
238 ; CHECK-NEXT: vcvtt.s32.f32 q0, q1, #3
241 %0 = zext i16 %p to i32
242 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
243 %2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, i32 3, <4 x i1> %1)
247 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
248 ; CHECK-LABEL: test_vcvtq_m_n_u32_f32:
249 ; CHECK: @ %bb.0: @ %entry
250 ; CHECK-NEXT: vmsr p0, r0
252 ; CHECK-NEXT: vcvtt.u32.f32 q0, q1, #32
255 %0 = zext i16 %p to i32
256 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
257 %2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, i32 32, <4 x i1> %1)
261 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_s16(<8 x i16> %a, i16 zeroext %p) {
262 ; CHECK-LABEL: test_vcvtq_x_n_f16_s16:
263 ; CHECK: @ %bb.0: @ %entry
264 ; CHECK-NEXT: vmsr p0, r0
266 ; CHECK-NEXT: vcvtt.f16.s16 q0, q0, #1
269 %0 = zext i16 %p to i32
270 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
271 %2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> %a, i32 1, <8 x i1> %1)
275 define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_u16(<8 x i16> %a, i16 zeroext %p) {
276 ; CHECK-LABEL: test_vcvtq_x_n_f16_u16:
277 ; CHECK: @ %bb.0: @ %entry
278 ; CHECK-NEXT: vmsr p0, r0
280 ; CHECK-NEXT: vcvtt.f16.u16 q0, q0, #2
283 %0 = zext i16 %p to i32
284 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
285 %2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> %a, i32 2, <8 x i1> %1)
289 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_s32(<4 x i32> %a, i16 zeroext %p) {
290 ; CHECK-LABEL: test_vcvtq_x_n_f32_s32:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vmsr p0, r0
294 ; CHECK-NEXT: vcvtt.f32.s32 q0, q0, #3
297 %0 = zext i16 %p to i32
298 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
299 %2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> %a, i32 3, <4 x i1> %1)
303 define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_u32(<4 x i32> %a, i16 zeroext %p) {
304 ; CHECK-LABEL: test_vcvtq_x_n_f32_u32:
305 ; CHECK: @ %bb.0: @ %entry
306 ; CHECK-NEXT: vmsr p0, r0
308 ; CHECK-NEXT: vcvtt.f32.u32 q0, q0, #32
311 %0 = zext i16 %p to i32
312 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
313 %2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> %a, i32 32, <4 x i1> %1)
317 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_s16_f16(<8 x half> %a, i16 zeroext %p) {
318 ; CHECK-LABEL: test_vcvtq_x_n_s16_f16:
319 ; CHECK: @ %bb.0: @ %entry
320 ; CHECK-NEXT: vmsr p0, r0
322 ; CHECK-NEXT: vcvtt.s16.f16 q0, q0, #1
325 %0 = zext i16 %p to i32
326 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
327 %2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, i32 1, <8 x i1> %1)
331 define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_u16_f16(<8 x half> %a, i16 zeroext %p) {
332 ; CHECK-LABEL: test_vcvtq_x_n_u16_f16:
333 ; CHECK: @ %bb.0: @ %entry
334 ; CHECK-NEXT: vmsr p0, r0
336 ; CHECK-NEXT: vcvtt.u16.f16 q0, q0, #2
339 %0 = zext i16 %p to i32
340 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
341 %2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, i32 2, <8 x i1> %1)
345 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_s32_f32(<4 x float> %a, i16 zeroext %p) {
346 ; CHECK-LABEL: test_vcvtq_x_n_s32_f32:
347 ; CHECK: @ %bb.0: @ %entry
348 ; CHECK-NEXT: vmsr p0, r0
350 ; CHECK-NEXT: vcvtt.s32.f32 q0, q0, #3
353 %0 = zext i16 %p to i32
354 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
355 %2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, i32 3, <4 x i1> %1)
359 define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_u32_f32(<4 x float> %a, i16 zeroext %p) {
360 ; CHECK-LABEL: test_vcvtq_x_n_u32_f32:
361 ; CHECK: @ %bb.0: @ %entry
362 ; CHECK-NEXT: vmsr p0, r0
364 ; CHECK-NEXT: vcvtt.u32.f32 q0, q0, #32
367 %0 = zext i16 %p to i32
368 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
369 %2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, i32 32, <4 x i1> %1)
373 define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_f32_f16(<8 x half> %a) {
374 ; CHECK-LABEL: test_vcvtbq_f32_f16:
375 ; CHECK: @ %bb.0: @ %entry
376 ; CHECK-NEXT: vcvtb.f32.f16 q0, q0
379 %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 0)
383 define arm_aapcs_vfpcc <4 x float> @test_vcvttq_f32_f16(<8 x half> %a) {
384 ; CHECK-LABEL: test_vcvttq_f32_f16:
385 ; CHECK: @ %bb.0: @ %entry
386 ; CHECK-NEXT: vcvtt.f32.f16 q0, q0
389 %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 1)
393 define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
394 ; CHECK-LABEL: test_vcvtbq_m_f32_f16:
395 ; CHECK: @ %bb.0: @ %entry
396 ; CHECK-NEXT: vmsr p0, r0
398 ; CHECK-NEXT: vcvtbt.f32.f16 q0, q1
401 %0 = zext i16 %p to i32
402 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
403 %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 0, <4 x i1> %1)
407 define arm_aapcs_vfpcc <4 x float> @test_vcvttq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
408 ; CHECK-LABEL: test_vcvttq_m_f32_f16:
409 ; CHECK: @ %bb.0: @ %entry
410 ; CHECK-NEXT: vmsr p0, r0
412 ; CHECK-NEXT: vcvttt.f32.f16 q0, q1
415 %0 = zext i16 %p to i32
416 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
417 %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 1, <4 x i1> %1)