1 ; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
5 ; NOTE: Due to their nature the expected inserts and extracts often emit no
6 ; instructions and so these tests verify the output of DAGCombiner directly.
8 target triple = "aarch64-unknown-linux-gnu"
10 ; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:'
11 ; CHECK: SelectionDAG has 10 nodes:
12 ; CHECK: t0: ch,glue = EntryToken
13 ; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
14 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
15 ; CHECK: t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0>
16 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6
17 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
19 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:'
20 ; CHECK: SelectionDAG has 9 nodes:
21 ; CHECK: t0: ch,glue = EntryToken
22 ; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
23 ; CHECK: t10: v16i8 = insert_subvector undef:v16i8, t2, Constant:i64<0>
24 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10
25 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
27 define <16 x i8> @insert_small_fixed_into_big_fixed(<8 x i8> %a) #0 {
28 %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0)
29 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
33 ; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:'
34 ; CHECK: SelectionDAG has 10 nodes:
35 ; CHECK: t0: ch,glue = EntryToken
36 ; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
37 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
38 ; CHECK: t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0>
39 ; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6
40 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
42 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:'
43 ; CHECK: SelectionDAG has 9 nodes:
44 ; CHECK: t0: ch,glue = EntryToken
45 ; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
46 ; CHECK: t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0>
47 ; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10
48 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
50 define <vscale x 16 x i8> @insert_small_fixed_into_big_scalable(<8 x i8> %a) #0 {
51 %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0)
52 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
53 ret <vscale x 16 x i8> %insert
56 ; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:'
57 ; CHECK: SelectionDAG has 11 nodes:
58 ; CHECK: t0: ch,glue = EntryToken
59 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
60 ; CHECK: t3: nxv8i8 = truncate t2
61 ; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0>
62 ; CHECK: t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0>
63 ; CHECK: t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7
64 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1
66 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:'
67 ; CHECK: SelectionDAG has 11 nodes:
68 ; CHECK: t0: ch,glue = EntryToken
69 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
70 ; CHECK: t3: nxv8i8 = truncate t2
71 ; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0>
72 ; CHECK: t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0>
73 ; CHECK: t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7
74 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1
76 ; Resulting insert would not be legal, so there's no transformation.
77 define <16 x i8> @insert_small_scalable_into_big_fixed(<vscale x 8 x i8> %a) #0 {
78 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
79 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
83 ; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:'
84 ; CHECK: SelectionDAG has 11 nodes:
85 ; CHECK: t0: ch,glue = EntryToken
86 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
87 ; CHECK: t3: nxv8i8 = truncate t2
88 ; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0>
89 ; CHECK: t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0>
90 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7
91 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
93 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:'
94 ; CHECK: SelectionDAG has 10 nodes:
95 ; CHECK: t0: ch,glue = EntryToken
96 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
97 ; CHECK: t3: nxv8i8 = truncate t2
98 ; CHECK: t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0>
99 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11
100 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
102 define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_1(<vscale x 8 x i8> %a) #0 {
103 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
104 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
105 ret <vscale x 16 x i8> %insert
108 ; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:'
109 ; CHECK: SelectionDAG has 11 nodes:
110 ; CHECK: t0: ch,glue = EntryToken
111 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
112 ; CHECK: t3: nxv8i8 = truncate t2
113 ; CHECK: t5: nxv4i8 = extract_subvector t3, Constant:i64<0>
114 ; CHECK: t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0>
115 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7
116 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
118 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:'
119 ; CHECK: SelectionDAG has 10 nodes:
120 ; CHECK: t0: ch,glue = EntryToken
121 ; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
122 ; CHECK: t3: nxv8i8 = truncate t2
123 ; CHECK: t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0>
124 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11
125 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
127 define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_2(<vscale x 8 x i8> %a) #0 {
128 %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
129 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <vscale x 4 x i8> %extract, i64 0)
130 ret <vscale x 16 x i8> %insert
133 ; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:'
134 ; CHECK: SelectionDAG has 10 nodes:
135 ; CHECK: t0: ch,glue = EntryToken
136 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
137 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
138 ; CHECK: t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0>
139 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6
140 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
142 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:'
143 ; CHECK: SelectionDAG has 8 nodes:
144 ; CHECK: t0: ch,glue = EntryToken
145 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
146 ; CHECK: t10: v8i8 = extract_subvector t2, Constant:i64<0>
147 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10
148 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
150 define <8 x i8> @extract_small_fixed_from_big_fixed(<16 x i8> %a) #0 {
151 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
152 %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0)
156 ; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:'
157 ; CHECK: SelectionDAG has 11 nodes:
158 ; CHECK: t0: ch,glue = EntryToken
159 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
160 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
161 ; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
162 ; CHECK: t7: nxv8i16 = any_extend t6
163 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
164 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
166 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:'
167 ; CHECK: SelectionDAG has 11 nodes:
168 ; CHECK: t0: ch,glue = EntryToken
169 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
170 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
171 ; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
172 ; CHECK: t7: nxv8i16 = any_extend t6
173 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
174 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
176 ; Resulting insert would not be legal, so there's no transformation.
177 define <vscale x 8 x i8> @extract_small_scalable_from_big_fixed(<16 x i8> %a) #0 {
178 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
179 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0)
180 ret <vscale x 8 x i8> %insert
183 ; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:'
184 ; CHECK: SelectionDAG has 10 nodes:
185 ; CHECK: t0: ch,glue = EntryToken
186 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
187 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
188 ; CHECK: t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0>
189 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6
190 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
192 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:'
193 ; CHECK: SelectionDAG has 8 nodes:
194 ; CHECK: t0: ch,glue = EntryToken
195 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
196 ; CHECK: t10: v8i8 = extract_subvector t2, Constant:i64<0>
197 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10
198 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
200 define <8 x i8> @extract_small_fixed_from_big_scalable(<vscale x 16 x i8> %a) #0 {
201 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
202 %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0)
206 ; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:'
207 ; CHECK: SelectionDAG has 11 nodes:
208 ; CHECK: t0: ch,glue = EntryToken
209 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
210 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
211 ; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
212 ; CHECK: t7: nxv8i16 = any_extend t6
213 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
214 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
216 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:'
217 ; CHECK: SelectionDAG has 9 nodes:
218 ; CHECK: t0: ch,glue = EntryToken
219 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
220 ; CHECK: t11: nxv8i8 = extract_subvector t2, Constant:i64<0>
221 ; CHECK: t7: nxv8i16 = any_extend t11
222 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
223 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
225 define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_1(<vscale x 16 x i8> %a) #0 {
226 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
227 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0)
228 ret <vscale x 8 x i8> %insert
231 ; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:'
232 ; CHECK: SelectionDAG has 11 nodes:
233 ; CHECK: t0: ch,glue = EntryToken
234 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
235 ; CHECK: t4: nxv4i8 = extract_subvector t2, Constant:i64<0>
236 ; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
237 ; CHECK: t7: nxv8i16 = any_extend t6
238 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
239 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
241 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:'
242 ; CHECK: SelectionDAG has 9 nodes:
243 ; CHECK: t0: ch,glue = EntryToken
244 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
245 ; CHECK: t11: nxv8i8 = extract_subvector t2, Constant:i64<0>
246 ; CHECK: t7: nxv8i16 = any_extend t11
247 ; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
248 ; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
250 define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_2(<vscale x 16 x i8> %a) #0 {
251 %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
252 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <vscale x 4 x i8> %extract, i64 0)
253 ret <vscale x 8 x i8> %insert
256 ; CHECK: Initial selection DAG: %bb.0 'extract_fixed_from_scalable:'
257 ; CHECK: SelectionDAG has 10 nodes:
258 ; CHECK: t0: ch,glue = EntryToken
259 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
260 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
261 ; CHECK: t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0>
262 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6
263 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
265 ; CHECK: Optimized lowered selection DAG: %bb.0 'extract_fixed_from_scalable:'
266 ; CHECK: SelectionDAG has 8 nodes:
267 ; CHECK: t0: ch,glue = EntryToken
268 ; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
269 ; CHECK: t10: v16i8 = extract_subvector t2, Constant:i64<0>
270 ; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10
271 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
273 ; A variant of insert_small_scalable_into_big_fixed whose vector types prevent
274 ; the expected transformation because the resulting insert would not be legal.
275 ; In this instance their matching minimum vector lengths allow us to perform the
276 ; opposite transformation and emit an extract instead.
277 define <16 x i8> @extract_fixed_from_scalable(<vscale x 16 x i8> %a) #0 {
278 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
279 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
280 ret <16 x i8> %insert
283 ; CHECK: Initial selection DAG: %bb.0 'insert_fixed_into_scalable:'
284 ; CHECK: SelectionDAG has 10 nodes:
285 ; CHECK: t0: ch,glue = EntryToken
286 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
287 ; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0>
288 ; CHECK: t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0>
289 ; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6
290 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
292 ; CHECK: Optimized lowered selection DAG: %bb.0 'insert_fixed_into_scalable:'
293 ; CHECK: SelectionDAG has 9 nodes:
294 ; CHECK: t0: ch,glue = EntryToken
295 ; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
296 ; CHECK: t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0>
297 ; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10
298 ; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
300 ; A variant of extract_small_scalable_from_big_fixed whose vector types prevent
301 ; the expected transformation because the resulting extract would not be legal.
302 ; In this instance their matching minimum vector lengths allow us to perform the
303 ; opposite transformation and emit an insert instead.
304 define <vscale x 16 x i8> @insert_fixed_into_scalable(<16 x i8> %a) #0 {
305 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
306 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
307 ret <vscale x 16 x i8> %insert
310 attributes #0 = { "target-features"="+sve" }