1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
4 declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) #2
5 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) #2
7 define <8 x half> @test1(<4 x float> noundef %a) {
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
13 %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a)
14 %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half>
15 %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16 ret <8 x half> %shuffle.i
19 define <8 x i8> @test2(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
21 ; CHECK: // %bb.0: // %entry
22 ; CHECK-NEXT: ldr q1, [x0]
23 ; CHECK-NEXT: shrn v1.8b, v1.8h, #4
24 ; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
27 %0 = load <8 x i16>, ptr %in, align 2
28 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
29 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
30 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
31 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
32 ret <8 x i8> %vtbl11.i
35 define <8 x i8> @tbl1v8i8(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
36 ; CHECK-LABEL: tbl1v8i8:
37 ; CHECK: // %bb.0: // %entry
38 ; CHECK-NEXT: ldr q1, [x0]
39 ; CHECK-NEXT: shrn v1.8b, v1.8h, #4
40 ; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
43 %0 = load <8 x i16>, ptr %in, align 2
44 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
45 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
46 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
47 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
48 ret <8 x i8> %vtbl11.i
51 define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
52 ; CHECK-LABEL: addpv4i16:
53 ; CHECK: // %bb.0: // %entry
54 ; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
57 %vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
58 %shuffle.i = shufflevector <4 x i16> %vpadd_v2.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59 ret <8 x i16> %shuffle.i
62 define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
63 ; CHECK-LABEL: addv4i16:
64 ; CHECK: // %bb.0: // %entry
65 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
68 %add.i = add <4 x i16> %b, %a
69 %shuffle.i = shufflevector <4 x i16> %add.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
70 ret <8 x i16> %shuffle.i
73 define <16 x i8> @rshrn(<8 x i16> noundef %a, <4 x i16> noundef %b) {
75 ; CHECK: // %bb.0: // %entry
76 ; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
79 %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a, i32 3)
80 %shuffle.i = shufflevector <8 x i8> %vrshrn_n1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
81 ret <16 x i8> %shuffle.i
84 define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) {
86 ; CHECK: // %bb.0: // %entry
87 ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
90 %vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
91 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
92 ret <16 x i8> %shuffle.i
95 define <2 x double> @fadd(double noundef %x, double noundef %y) {
97 ; CHECK: // %bb.0: // %entry
98 ; CHECK-NEXT: movi v2.2d, #0000000000000000
99 ; CHECK-NEXT: fadd d0, d0, d1
100 ; CHECK-NEXT: mov v2.d[0], v0.d[0]
101 ; CHECK-NEXT: mov v0.16b, v2.16b
104 %add = fadd double %x, %y
105 %vecinit1 = insertelement <2 x double> poison, double %add, i64 0
106 %vecinit2 = insertelement <2 x double> %vecinit1, double 0.0, i64 1
107 ret <2 x double> %vecinit2
110 define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) {
112 ; CHECK: // %bb.0: // %entry
113 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
116 %vbsl3.i = and <4 x i16> %c, %a
117 %0 = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
118 %vbsl4.i = and <4 x i16> %0, %d
119 %vbsl5.i = or <4 x i16> %vbsl4.i, %vbsl3.i
120 %1 = bitcast <4 x i16> %vbsl5.i to <8 x i8>
121 %shuffle.i = shufflevector <8 x i8> %1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
122 ret <16 x i8> %shuffle.i
125 define <16 x i8> @load(ptr %a, <8 x i8> %b) {
127 ; CHECK: // %bb.0: // %entry
128 ; CHECK-NEXT: ldr d0, [x0]
131 %vtbl11 = load <8 x i8>, ptr %a
132 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
133 ret <16 x i8> %shuffle.i
137 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
138 declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)