1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
4 ; COST-LABEL: trn1.v8i8
5 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
6 ; CODE-LABEL: trn1.v8i8
7 ; CODE: trn1 v0.8b, v0.8b, v1.8b
8 define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
9 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
13 ; COST-LABEL: trn2.v8i8
14 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
15 ; CODE-LABEL: trn2.v8i8
16 ; CODE: trn2 v0.8b, v0.8b, v1.8b
17 define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
18 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
22 ; COST-LABEL: trn1.v16i8
23 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
24 ; CODE-LABEL: trn1.v16i8
25 ; CODE: trn1 v0.16b, v0.16b, v1.16b
26 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
27 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
31 ; COST-LABEL: trn2.v16i8
32 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
33 ; CODE-LABEL: trn2.v16i8
34 ; CODE: trn2 v0.16b, v0.16b, v1.16b
35 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
36 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
40 ; COST-LABEL: trn1.v4i16
41 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
42 ; CODE-LABEL: trn1.v4i16
43 ; CODE: trn1 v0.4h, v0.4h, v1.4h
44 define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
45 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
49 ; COST-LABEL: trn2.v4i16
50 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
51 ; CODE-LABEL: trn2.v4i16
52 ; CODE: trn2 v0.4h, v0.4h, v1.4h
53 define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
54 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
58 ; COST-LABEL: trn1.v8i16
59 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
60 ; CODE-LABEL: trn1.v8i16
61 ; CODE: trn1 v0.8h, v0.8h, v1.8h
62 define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
63 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
67 ; COST-LABEL: trn2.v8i16
68 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
69 ; CODE-LABEL: trn2.v8i16
70 ; CODE: trn2 v0.8h, v0.8h, v1.8h
71 define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
72 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
76 ; COST-LABEL: trn1.v2i32
77 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
78 ; CODE-LABEL: trn1.v2i32
79 ; CODE: zip1 v0.2s, v0.2s, v1.2s
80 define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
81 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
85 ; COST-LABEL: trn2.v2i32
86 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
87 ; CODE-LABEL: trn2.v2i32
88 ; CODE: zip2 v0.2s, v0.2s, v1.2s
89 define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
90 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
94 ; COST-LABEL: trn1.v4i32
95 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
96 ; CODE-LABEL: trn1.v4i32
97 ; CODE: trn1 v0.4s, v0.4s, v1.4s
98 define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
99 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
103 ; COST-LABEL: trn2.v4i32
104 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
105 ; CODE-LABEL: trn2.v4i32
106 ; CODE: trn2 v0.4s, v0.4s, v1.4s
107 define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
108 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
112 ; COST-LABEL: trn1.v2i64
113 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
114 ; CODE-LABEL: trn1.v2i64
115 ; CODE: zip1 v0.2d, v0.2d, v1.2d
116 define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
117 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
121 ; COST-LABEL: trn2.v2i64
122 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
123 ; CODE-LABEL: trn2.v2i64
124 ; CODE: zip2 v0.2d, v0.2d, v1.2d
125 define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
126 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
130 ; COST-LABEL: trn1.v2f32
131 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
132 ; CODE-LABEL: trn1.v2f32
133 ; CODE: zip1 v0.2s, v0.2s, v1.2s
134 define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
135 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
136 ret <2 x float> %tmp0
139 ; COST-LABEL: trn2.v2f32
140 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
141 ; CODE-LABEL: trn2.v2f32
142 ; CODE: zip2 v0.2s, v0.2s, v1.2s
143 define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
144 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
145 ret <2 x float> %tmp0
148 ; COST-LABEL: trn1.v4f32
149 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
150 ; CODE-LABEL: trn1.v4f32
151 ; CODE: trn1 v0.4s, v0.4s, v1.4s
152 define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
153 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
154 ret <4 x float> %tmp0
157 ; COST-LABEL: trn2.v4f32
158 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
159 ; CODE-LABEL: trn2.v4f32
160 ; CODE: trn2 v0.4s, v0.4s, v1.4s
161 define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
162 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
163 ret <4 x float> %tmp0
166 ; COST-LABEL: trn1.v2f64
167 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
168 ; CODE-LABEL: trn1.v2f64
169 ; CODE: zip1 v0.2d, v0.2d, v1.2d
170 define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
171 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
172 ret <2 x double> %tmp0
175 ; COST-LABEL: trn2.v2f64
176 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
177 ; CODE-LABEL: trn2.v2f64
178 ; CODE: zip2 v0.2d, v0.2d, v1.2d
179 define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
180 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
181 ret <2 x double> %tmp0