1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
5 define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) {
6 ; CHECK-LABEL: vdup_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vdup.32 q0, r0
11 %0 = insertelement <4 x i32> undef, i32 %src, i32 0
12 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
16 define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) {
17 ; CHECK-LABEL: vdup_i16:
18 ; CHECK: @ %bb.0: @ %entry
19 ; CHECK-NEXT: vdup.16 q0, r0
22 %0 = insertelement <8 x i16> undef, i16 %src, i32 0
23 %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
27 define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) {
28 ; CHECK-LABEL: vdup_i8:
29 ; CHECK: @ %bb.0: @ %entry
30 ; CHECK-NEXT: vdup.8 q0, r0
33 %0 = insertelement <16 x i8> undef, i8 %src, i32 0
34 %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
38 define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
39 ; CHECK-LABEL: vdup_i64:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: vmov.32 q0[0], r0
42 ; CHECK-NEXT: vmov.32 q0[1], r1
43 ; CHECK-NEXT: vmov.32 q0[2], r0
44 ; CHECK-NEXT: vmov.32 q0[3], r1
47 %0 = insertelement <2 x i64> undef, i64 %src, i32 0
48 %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
52 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
53 ; CHECK-LABEL: vdup_f32_1:
54 ; CHECK: @ %bb.0: @ %entry
55 ; CHECK-NEXT: vmov r0, s0
56 ; CHECK-NEXT: vdup.32 q0, r0
59 %0 = insertelement <4 x float> undef, float %src, i32 0
60 %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
64 define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) {
65 ; CHECK-LABEL: vdup_f32_2:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vadd.f32 s0, s0, s1
68 ; CHECK-NEXT: vmov r0, s0
69 ; CHECK-NEXT: vdup.32 q0, r0
72 %0 = fadd float %src1, %src2
73 %1 = insertelement <4 x float> undef, float %0, i32 0
74 %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
78 ; TODO: Calling convention needs fixing to pass half types directly to functions
79 define arm_aapcs_vfpcc <8 x half> @vdup_f16(half* %src1, half* %src2) {
80 ; CHECK-LABEL: vdup_f16:
81 ; CHECK: @ %bb.0: @ %entry
82 ; CHECK-NEXT: vldr.16 s0, [r1]
83 ; CHECK-NEXT: vldr.16 s2, [r0]
84 ; CHECK-NEXT: vadd.f16 s0, s2, s0
85 ; CHECK-NEXT: vmov r0, s0
86 ; CHECK-NEXT: vdup.16 q0, r0
89 %0 = load half, half *%src1, align 2
90 %1 = load half, half *%src2, align 2
92 %3 = insertelement <8 x half> undef, half %2, i32 0
93 %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
97 define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
98 ; CHECK-LABEL: vdup_f64:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
101 ; CHECK-NEXT: vmov.f32 s2, s0
102 ; CHECK-NEXT: vmov.f32 s3, s1
105 %0 = insertelement <2 x double> undef, double %src, i32 0
106 %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
107 ret <2 x double> %out
112 define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
113 ; CHECK-LABEL: vduplane_i32:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: vmov.32 r0, q0[3]
116 ; CHECK-NEXT: vdup.32 q0, r0
119 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
123 define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) {
124 ; CHECK-LABEL: vduplane_i16:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vmov.u16 r0, q0[3]
127 ; CHECK-NEXT: vdup.16 q0, r0
130 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
134 define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) {
135 ; CHECK-LABEL: vduplane_i8:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vmov.u8 r0, q0[3]
138 ; CHECK-NEXT: vdup.8 q0, r0
141 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
145 define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
146 ; CHECK-LABEL: vduplane_i64:
147 ; CHECK: @ %bb.0: @ %entry
148 ; CHECK-NEXT: vmov.f32 s0, s2
149 ; CHECK-NEXT: vmov.f32 s1, s3
152 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
156 define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
157 ; CHECK-LABEL: vduplane_f32:
158 ; CHECK: @ %bb.0: @ %entry
159 ; CHECK-NEXT: vmov.32 r0, q0[3]
160 ; CHECK-NEXT: vdup.32 q0, r0
163 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
167 define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) {
168 ; CHECK-LABEL: vduplane_f16:
169 ; CHECK: @ %bb.0: @ %entry
170 ; CHECK-NEXT: vmov.u16 r0, q0[3]
171 ; CHECK-NEXT: vdup.16 q0, r0
174 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
178 define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
179 ; CHECK-LABEL: vduplane_f64:
180 ; CHECK: @ %bb.0: @ %entry
181 ; CHECK-NEXT: vmov.f32 s0, s2
182 ; CHECK-NEXT: vmov.f32 s1, s3
185 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
186 ret <2 x double> %out