1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
5 ; CHECK-LABEL: test_vornq_u8:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vorn q0, q0, q1
10 %0 = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
11 %1 = or <16 x i8> %0, %a
15 define arm_aapcs_vfpcc <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
16 ; CHECK-LABEL: test_vornq_s16:
17 ; CHECK: @ %bb.0: @ %entry
18 ; CHECK-NEXT: vorn q0, q0, q1
21 %0 = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
22 %1 = or <8 x i16> %0, %a
26 define arm_aapcs_vfpcc <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
27 ; CHECK-LABEL: test_vornq_u32:
28 ; CHECK: @ %bb.0: @ %entry
29 ; CHECK-NEXT: vorn q0, q0, q1
32 %0 = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
33 %1 = or <4 x i32> %0, %a
37 define arm_aapcs_vfpcc <4 x float> @test_vornq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
38 ; CHECK-LABEL: test_vornq_f32:
39 ; CHECK: @ %bb.0: @ %entry
40 ; CHECK-NEXT: vorn q0, q0, q1
43 %0 = bitcast <4 x float> %a to <4 x i32>
44 %1 = bitcast <4 x float> %b to <4 x i32>
45 %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
46 %3 = or <4 x i32> %2, %0
47 %4 = bitcast <4 x i32> %3 to <4 x float>
51 define arm_aapcs_vfpcc <16 x i8> @test_vornq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
52 ; CHECK-LABEL: test_vornq_m_s8:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: vmsr p0, r0
56 ; CHECK-NEXT: vornt q0, q1, q2
59 %0 = zext i16 %p to i32
60 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
61 %2 = tail call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
65 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
67 declare <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
69 define arm_aapcs_vfpcc <8 x i16> @test_vornq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
70 ; CHECK-LABEL: test_vornq_m_u16:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vmsr p0, r0
74 ; CHECK-NEXT: vornt q0, q1, q2
77 %0 = zext i16 %p to i32
78 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
79 %2 = tail call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
83 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
85 declare <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
87 define arm_aapcs_vfpcc <4 x i32> @test_vornq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
88 ; CHECK-LABEL: test_vornq_m_s32:
89 ; CHECK: @ %bb.0: @ %entry
90 ; CHECK-NEXT: vmsr p0, r0
92 ; CHECK-NEXT: vornt q0, q1, q2
95 %0 = zext i16 %p to i32
96 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
97 %2 = tail call <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
101 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
103 declare <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
105 define arm_aapcs_vfpcc <8 x half> @test_vornq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
106 ; CHECK-LABEL: test_vornq_m_f16:
107 ; CHECK: @ %bb.0: @ %entry
108 ; CHECK-NEXT: vmsr p0, r0
110 ; CHECK-NEXT: vornt q0, q1, q2
113 %0 = bitcast <8 x half> %a to <8 x i16>
114 %1 = bitcast <8 x half> %b to <8 x i16>
115 %2 = zext i16 %p to i32
116 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
117 %4 = bitcast <8 x half> %inactive to <8 x i16>
118 %5 = tail call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> %0, <8 x i16> %1, <8 x i1> %3, <8 x i16> %4)
119 %6 = bitcast <8 x i16> %5 to <8 x half>
123 define arm_aapcs_vfpcc <16 x i8> @test_vornq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
124 ; CHECK-LABEL: test_vornq_x_u8:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vmsr p0, r0
128 ; CHECK-NEXT: vornt q0, q0, q1
131 %0 = zext i16 %p to i32
132 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
133 %2 = tail call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> undef)
137 define arm_aapcs_vfpcc <8 x i16> @test_vornq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
138 ; CHECK-LABEL: test_vornq_x_s16:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: vmsr p0, r0
142 ; CHECK-NEXT: vornt q0, q0, q1
145 %0 = zext i16 %p to i32
146 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
147 %2 = tail call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef)
151 define arm_aapcs_vfpcc <4 x i32> @test_vornq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
152 ; CHECK-LABEL: test_vornq_x_u32:
153 ; CHECK: @ %bb.0: @ %entry
154 ; CHECK-NEXT: vmsr p0, r0
156 ; CHECK-NEXT: vornt q0, q0, q1
159 %0 = zext i16 %p to i32
160 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
161 %2 = tail call <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> undef)
165 define arm_aapcs_vfpcc <4 x float> @test_vornq_m_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
166 ; CHECK-LABEL: test_vornq_m_f32:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: vmsr p0, r0
170 ; CHECK-NEXT: vornt q0, q0, q1
173 %0 = bitcast <4 x float> %a to <4 x i32>
174 %1 = bitcast <4 x float> %b to <4 x i32>
175 %2 = zext i16 %p to i32
176 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
177 %4 = tail call <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> undef)
178 %5 = bitcast <4 x i32> %4 to <4 x float>