1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
4 declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) #2
5 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) #2
7 define <8 x half> @test1(<4 x float> noundef %a) {
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
13 %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a)
14 %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half>
15 %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16 ret <8 x half> %shuffle.i
19 define <8 x i8> @test2(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
21 ; CHECK: // %bb.0: // %entry
22 ; CHECK-NEXT: ldr q1, [x0]
23 ; CHECK-NEXT: shrn v1.8b, v1.8h, #4
24 ; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
27 %0 = load <8 x i16>, ptr %in, align 2
28 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
29 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
30 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
31 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
32 ret <8 x i8> %vtbl11.i
35 define <8 x i8> @tbl1v8i8(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
36 ; CHECK-LABEL: tbl1v8i8:
37 ; CHECK: // %bb.0: // %entry
38 ; CHECK-NEXT: ldr q1, [x0]
39 ; CHECK-NEXT: shrn v1.8b, v1.8h, #4
40 ; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
43 %0 = load <8 x i16>, ptr %in, align 2
44 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
45 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
46 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
47 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
48 ret <8 x i8> %vtbl11.i
51 define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
52 ; CHECK-LABEL: addpv4i16:
53 ; CHECK: // %bb.0: // %entry
54 ; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
57 %vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
58 %shuffle.i = shufflevector <4 x i16> %vpadd_v2.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59 ret <8 x i16> %shuffle.i
62 define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
63 ; CHECK-LABEL: addv4i16:
64 ; CHECK: // %bb.0: // %entry
65 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
68 %add.i = add <4 x i16> %b, %a
69 %shuffle.i = shufflevector <4 x i16> %add.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
70 ret <8 x i16> %shuffle.i
73 define <16 x i8> @rshrn(<8 x i16> noundef %a, <4 x i16> noundef %b) {
75 ; CHECK: // %bb.0: // %entry
76 ; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
79 %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a, i32 3)
80 %shuffle.i = shufflevector <8 x i8> %vrshrn_n1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
81 ret <16 x i8> %shuffle.i
84 define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) {
86 ; CHECK: // %bb.0: // %entry
87 ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
90 %vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
91 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
92 ret <16 x i8> %shuffle.i
95 define <2 x double> @fadd(double noundef %x, double noundef %y) {
97 ; CHECK: // %bb.0: // %entry
98 ; CHECK-NEXT: movi v2.2d, #0000000000000000
99 ; CHECK-NEXT: fadd d0, d0, d1
100 ; CHECK-NEXT: mov v2.d[0], v0.d[0]
101 ; CHECK-NEXT: mov v0.16b, v2.16b
104 %add = fadd double %x, %y
105 %vecinit1 = insertelement <2 x double> poison, double %add, i64 0
106 %vecinit2 = insertelement <2 x double> %vecinit1, double 0.0, i64 1
107 ret <2 x double> %vecinit2
110 define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) {
112 ; CHECK: // %bb.0: // %entry
113 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
116 %vbsl3.i = and <4 x i16> %c, %a
117 %0 = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
118 %vbsl4.i = and <4 x i16> %0, %d
119 %vbsl5.i = or <4 x i16> %vbsl4.i, %vbsl3.i
120 %1 = bitcast <4 x i16> %vbsl5.i to <8 x i8>
121 %shuffle.i = shufflevector <8 x i8> %1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
122 ret <16 x i8> %shuffle.i
125 define <16 x i8> @load(ptr %a, <8 x i8> %b) {
127 ; CHECK: // %bb.0: // %entry
128 ; CHECK-NEXT: ldr d0, [x0]
131 %vtbl11 = load <8 x i8>, ptr %a
132 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
133 ret <16 x i8> %shuffle.i
137 define <16 x i8> @insertzero_v8i8(<8 x i8> %a) {
138 ; CHECK-LABEL: insertzero_v8i8:
139 ; CHECK: // %bb.0: // %entry
140 ; CHECK-NEXT: fmov d0, d0
143 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
144 ret <16 x i8> %shuffle.i
147 define <8 x i16> @insertzero_v4i16(<4 x i16> %a) {
148 ; CHECK-LABEL: insertzero_v4i16:
149 ; CHECK: // %bb.0: // %entry
150 ; CHECK-NEXT: fmov d0, d0
153 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
154 ret <8 x i16> %shuffle.i
157 define <4 x i32> @insertzero_v2i32(<2 x i32> %a) {
158 ; CHECK-LABEL: insertzero_v2i32:
159 ; CHECK: // %bb.0: // %entry
160 ; CHECK-NEXT: fmov d0, d0
163 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
164 ret <4 x i32> %shuffle.i
167 define <2 x i64> @insertzero_v1i64(<1 x i64> %a) {
168 ; CHECK-LABEL: insertzero_v1i64:
169 ; CHECK: // %bb.0: // %entry
170 ; CHECK-NEXT: fmov d0, d0
173 %shuffle.i = shufflevector <1 x i64> %a, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
174 ret <2 x i64> %shuffle.i
177 define <8 x half> @insertzero_v4f16(<4 x half> %a) {
178 ; CHECK-LABEL: insertzero_v4f16:
179 ; CHECK: // %bb.0: // %entry
180 ; CHECK-NEXT: fmov d0, d0
183 %shuffle.i = shufflevector <4 x half> %a, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
184 ret <8 x half> %shuffle.i
187 define <8 x bfloat> @insertzero_v4bf16(<4 x bfloat> %a) {
188 ; CHECK-LABEL: insertzero_v4bf16:
189 ; CHECK: // %bb.0: // %entry
190 ; CHECK-NEXT: fmov d0, d0
193 %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
194 ret <8 x bfloat> %shuffle.i
197 define <4 x float> @insertzero_v2f32(<2 x float> %a) {
198 ; CHECK-LABEL: insertzero_v2f32:
199 ; CHECK: // %bb.0: // %entry
200 ; CHECK-NEXT: fmov d0, d0
203 %shuffle.i = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
204 ret <4 x float> %shuffle.i
207 define <2 x double> @insertzero_v1f64(<1 x double> %a) {
208 ; CHECK-LABEL: insertzero_v1f64:
209 ; CHECK: // %bb.0: // %entry
210 ; CHECK-NEXT: fmov d0, d0
213 %shuffle.i = shufflevector <1 x double> %a, <1 x double> zeroinitializer, <2 x i32> <i32 0, i32 1>
214 ret <2 x double> %shuffle.i
219 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
220 declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)