1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6 ; COST-LABEL: trn1.v8i8
7 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
8 ; CODE-LABEL: trn1.v8i8
9 ; CODE: trn1 v0.8b, v0.8b, v1.8b
10 define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
11 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
15 ; COST-LABEL: trn2.v8i8
16 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
17 ; CODE-LABEL: trn2.v8i8
18 ; CODE: trn2 v0.8b, v0.8b, v1.8b
19 define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
20 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
24 ; COST-LABEL: trn1.v16i8
25 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
26 ; CODE-LABEL: trn1.v16i8
27 ; CODE: trn1 v0.16b, v0.16b, v1.16b
28 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
29 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
33 ; COST-LABEL: trn2.v16i8
34 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
35 ; CODE-LABEL: trn2.v16i8
36 ; CODE: trn2 v0.16b, v0.16b, v1.16b
37 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
38 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
42 ; COST-LABEL: trn1.v4i16
43 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
44 ; CODE-LABEL: trn1.v4i16
45 ; CODE: trn1 v0.4h, v0.4h, v1.4h
46 define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
47 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
51 ; COST-LABEL: trn2.v4i16
52 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
53 ; CODE-LABEL: trn2.v4i16
54 ; CODE: trn2 v0.4h, v0.4h, v1.4h
55 define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
56 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
60 ; COST-LABEL: trn1.v8i16
61 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
62 ; CODE-LABEL: trn1.v8i16
63 ; CODE: trn1 v0.8h, v0.8h, v1.8h
64 define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
65 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
69 ; COST-LABEL: trn2.v8i16
70 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
71 ; CODE-LABEL: trn2.v8i16
72 ; CODE: trn2 v0.8h, v0.8h, v1.8h
73 define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
74 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
78 ; COST-LABEL: trn1.v2i32
79 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
80 ; CODE-LABEL: trn1.v2i32
81 ; CODE: zip1 v0.2s, v0.2s, v1.2s
82 define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
83 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
87 ; COST-LABEL: trn2.v2i32
88 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
89 ; CODE-LABEL: trn2.v2i32
90 ; CODE: zip2 v0.2s, v0.2s, v1.2s
91 define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
92 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
96 ; COST-LABEL: trn1.v4i32
97 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
98 ; CODE-LABEL: trn1.v4i32
99 ; CODE: trn1 v0.4s, v0.4s, v1.4s
100 define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
101 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
105 ; COST-LABEL: trn2.v4i32
106 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
107 ; CODE-LABEL: trn2.v4i32
108 ; CODE: trn2 v0.4s, v0.4s, v1.4s
109 define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
110 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
114 ; COST-LABEL: trn1.v2i64
115 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
116 ; CODE-LABEL: trn1.v2i64
117 ; CODE: zip1 v0.2d, v0.2d, v1.2d
118 define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
119 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
123 ; COST-LABEL: trn2.v2i64
124 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
125 ; CODE-LABEL: trn2.v2i64
126 ; CODE: zip2 v0.2d, v0.2d, v1.2d
127 define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
128 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
132 ; COST-LABEL: trn1.v2f32
133 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
134 ; CODE-LABEL: trn1.v2f32
135 ; CODE: zip1 v0.2s, v0.2s, v1.2s
136 define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
137 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
138 ret <2 x float> %tmp0
141 ; COST-LABEL: trn2.v2f32
142 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
143 ; CODE-LABEL: trn2.v2f32
144 ; CODE: zip2 v0.2s, v0.2s, v1.2s
145 define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
146 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
147 ret <2 x float> %tmp0
150 ; COST-LABEL: trn1.v4f32
151 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
152 ; CODE-LABEL: trn1.v4f32
153 ; CODE: trn1 v0.4s, v0.4s, v1.4s
154 define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
155 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
156 ret <4 x float> %tmp0
159 ; COST-LABEL: trn2.v4f32
160 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
161 ; CODE-LABEL: trn2.v4f32
162 ; CODE: trn2 v0.4s, v0.4s, v1.4s
163 define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
164 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
165 ret <4 x float> %tmp0
168 ; COST-LABEL: trn1.v2f64
169 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
170 ; CODE-LABEL: trn1.v2f64
171 ; CODE: zip1 v0.2d, v0.2d, v1.2d
172 define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
173 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
174 ret <2 x double> %tmp0
177 ; COST-LABEL: trn2.v2f64
178 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
179 ; CODE-LABEL: trn2.v2f64
180 ; CODE: zip2 v0.2d, v0.2d, v1.2d
181 define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
182 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
183 ret <2 x double> %tmp0
186 ; COST-LABEL: trn1.v4f16
187 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
188 ; CODE-LABEL: trn1.v4f16
189 ; CODE: trn1 v0.4h, v0.4h, v1.4h
190 define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
191 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
195 ; COST-LABEL: trn2.v4f16
196 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
197 ; CODE-LABEL: trn2.v4f16
198 ; CODE: trn2 v0.4h, v0.4h, v1.4h
199 define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
200 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
204 ; COST-LABEL: trn1.v8f16
205 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
206 ; CODE-LABEL: trn1.v8f16
207 ; CODE: trn1 v0.8h, v0.8h, v1.8h
208 define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
209 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
213 ; COST-LABEL: trn2.v8f16
214 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
215 ; CODE-LABEL: trn2.v8f16
216 ; CODE: trn2 v0.8h, v0.8h, v1.8h
217 define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
218 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>