1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
5 define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) {
6 ; CHECK-LABEL: vdup_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vdup.32 q0, r0
11 %0 = insertelement <4 x i32> undef, i32 %src, i32 0
12 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
16 define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) {
17 ; CHECK-LABEL: vdup_i16:
18 ; CHECK: @ %bb.0: @ %entry
19 ; CHECK-NEXT: vdup.16 q0, r0
22 %0 = insertelement <8 x i16> undef, i16 %src, i32 0
23 %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
27 define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) {
28 ; CHECK-LABEL: vdup_i8:
29 ; CHECK: @ %bb.0: @ %entry
30 ; CHECK-NEXT: vdup.8 q0, r0
33 %0 = insertelement <16 x i8> undef, i8 %src, i32 0
34 %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
38 define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
39 ; CHECK-LABEL: vdup_i64:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r0
42 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r1
45 %0 = insertelement <2 x i64> undef, i64 %src, i32 0
46 %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
50 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
51 ; CHECK-LABEL: vdup_f32_1:
52 ; CHECK: @ %bb.0: @ %entry
53 ; CHECK-NEXT: vmov r0, s0
54 ; CHECK-NEXT: vdup.32 q0, r0
57 %0 = insertelement <4 x float> undef, float %src, i32 0
58 %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
62 define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) {
63 ; CHECK-LABEL: vdup_f32_2:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vadd.f32 s0, s0, s1
66 ; CHECK-NEXT: vmov r0, s0
67 ; CHECK-NEXT: vdup.32 q0, r0
70 %0 = fadd float %src1, %src2
71 %1 = insertelement <4 x float> undef, float %0, i32 0
72 %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
76 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
77 ; CHECK-LABEL: vdup_f32_1bc:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmov r0, s0
80 ; CHECK-NEXT: vdup.32 q0, r0
83 %srcbc = bitcast float %src to i32
84 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
85 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
86 %outbc = bitcast <4 x i32> %out to <4 x float>
87 ret <4 x float> %outbc
90 define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
91 ; CHECK-LABEL: vdup_f32_2bc:
92 ; CHECK: @ %bb.0: @ %entry
93 ; CHECK-NEXT: vadd.f32 s0, s0, s1
94 ; CHECK-NEXT: vmov r0, s0
95 ; CHECK-NEXT: vdup.32 q0, r0
98 %0 = fadd float %src1, %src2
99 %bc = bitcast float %0 to i32
100 %1 = insertelement <4 x i32> undef, i32 %bc, i32 0
101 %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
102 %outbc = bitcast <4 x i32> %out to <4 x float>
103 ret <4 x float> %outbc
106 define arm_aapcs_vfpcc <8 x half> @vdup_f16(half %0, half %1) {
107 ; CHECK-LABEL: vdup_f16:
108 ; CHECK: @ %bb.0: @ %entry
109 ; CHECK-NEXT: vadd.f16 s0, s0, s1
110 ; CHECK-NEXT: vmov.f16 r0, s0
111 ; CHECK-NEXT: vdup.16 q0, r0
114 %2 = fadd half %0, %1
115 %3 = insertelement <8 x half> undef, half %2, i32 0
116 %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
120 define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half %0, half %1) {
121 ; CHECK-LABEL: vdup_f16_bc:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vadd.f16 s0, s0, s1
124 ; CHECK-NEXT: vmov.f16 r0, s0
125 ; CHECK-NEXT: vdup.16 q0, r0
128 %2 = fadd half %0, %1
129 %bc = bitcast half %2 to i16
130 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
131 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
132 %outbc = bitcast <8 x i16> %out to <8 x half>
133 ret <8 x half> %outbc
136 define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
137 ; CHECK-LABEL: vdup_f64:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vmov.f32 s2, s0
140 ; CHECK-NEXT: vmov.f32 s3, s1
143 %0 = insertelement <2 x double> undef, double %src, i32 0
144 %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
145 ret <2 x double> %out
150 define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
151 ; CHECK-LABEL: vduplane_i32:
152 ; CHECK: @ %bb.0: @ %entry
153 ; CHECK-NEXT: vmov r0, s3
154 ; CHECK-NEXT: vdup.32 q0, r0
157 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
161 define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) {
162 ; CHECK-LABEL: vduplane_i16:
163 ; CHECK: @ %bb.0: @ %entry
164 ; CHECK-NEXT: vmov.u16 r0, q0[3]
165 ; CHECK-NEXT: vdup.16 q0, r0
168 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
172 define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) {
173 ; CHECK-LABEL: vduplane_i8:
174 ; CHECK: @ %bb.0: @ %entry
175 ; CHECK-NEXT: vmov.u8 r0, q0[3]
176 ; CHECK-NEXT: vdup.8 q0, r0
179 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
183 define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
184 ; CHECK-LABEL: vduplane_i64:
185 ; CHECK: @ %bb.0: @ %entry
186 ; CHECK-NEXT: vmov.f32 s0, s2
187 ; CHECK-NEXT: vmov.f32 s1, s3
190 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
194 define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
195 ; CHECK-LABEL: vduplane_f32:
196 ; CHECK: @ %bb.0: @ %entry
197 ; CHECK-NEXT: vmov r0, s3
198 ; CHECK-NEXT: vdup.32 q0, r0
201 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
205 define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) {
206 ; CHECK-LABEL: vduplane_f16:
207 ; CHECK: @ %bb.0: @ %entry
208 ; CHECK-NEXT: vmov.u16 r0, q0[3]
209 ; CHECK-NEXT: vdup.16 q0, r0
212 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
216 define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
217 ; CHECK-LABEL: vduplane_f64:
218 ; CHECK: @ %bb.0: @ %entry
219 ; CHECK-NEXT: vmov.f32 s0, s2
220 ; CHECK-NEXT: vmov.f32 s1, s3
223 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
224 ret <2 x double> %out
228 define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
229 ; CHECK-LABEL: vdup_f32_extract:
230 ; CHECK: @ %bb.0: @ %entry
233 %srcbc = bitcast float %src to i32
234 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
235 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
236 %outbc = bitcast <4 x i32> %out to <4 x float>
237 %ext = extractelement <4 x float> %outbc, i32 2
241 define arm_aapcs_vfpcc half @vdup_f16_extract(half %0, half %1) {
242 ; CHECK-LABEL: vdup_f16_extract:
243 ; CHECK: @ %bb.0: @ %entry
244 ; CHECK-NEXT: vadd.f16 s0, s0, s1
247 %2 = fadd half %0, %1
248 %bc = bitcast half %2 to i16
249 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
250 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
251 %outbc = bitcast <8 x i16> %out to <8 x half>
252 %ext = extractelement <8 x half> %outbc, i32 2