1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
4 define <8 x i8> @extract_2_v4i16(<4 x i16> %a, <4 x i16> %b) {
5 ; CHECK-LABEL: extract_2_v4i16:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b
10 %a0 = extractelement <4 x i16> %a, i32 0
11 %a1 = extractelement <4 x i16> %a, i32 1
12 %a2 = extractelement <4 x i16> %a, i32 2
13 %a3 = extractelement <4 x i16> %a, i32 3
14 %b0 = extractelement <4 x i16> %b, i32 0
15 %b1 = extractelement <4 x i16> %b, i32 1
16 %b2 = extractelement <4 x i16> %b, i32 2
17 %b3 = extractelement <4 x i16> %b, i32 3
18 %t0 = trunc i16 %a0 to i8
19 %t1 = trunc i16 %a1 to i8
20 %t2 = trunc i16 %a2 to i8
21 %t3 = trunc i16 %a3 to i8
22 %t4 = trunc i16 %b0 to i8
23 %t5 = trunc i16 %b1 to i8
24 %t6 = trunc i16 %b2 to i8
25 %t7 = trunc i16 %b3 to i8
26 %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
27 %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
28 %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
29 %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
30 %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
31 %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
32 %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
33 %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
37 define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) {
38 ; CHECK-LABEL: extract_2_v4i32:
39 ; CHECK: // %bb.0: // %entry
40 ; CHECK-NEXT: mov w8, v0.s[1]
41 ; CHECK-NEXT: mov w9, v0.s[2]
42 ; CHECK-NEXT: mov w10, v0.s[3]
43 ; CHECK-NEXT: mov v0.b[1], w8
44 ; CHECK-NEXT: mov w8, v1.s[1]
45 ; CHECK-NEXT: mov v0.b[2], w9
46 ; CHECK-NEXT: mov w9, v1.s[2]
47 ; CHECK-NEXT: mov v0.b[3], w10
48 ; CHECK-NEXT: mov v0.b[4], v1.b[0]
49 ; CHECK-NEXT: mov v0.b[5], w8
50 ; CHECK-NEXT: mov w8, v1.s[3]
51 ; CHECK-NEXT: mov v0.b[6], w9
52 ; CHECK-NEXT: mov v0.b[7], w8
53 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
56 %a0 = extractelement <4 x i32> %a, i32 0
57 %a1 = extractelement <4 x i32> %a, i32 1
58 %a2 = extractelement <4 x i32> %a, i32 2
59 %a3 = extractelement <4 x i32> %a, i32 3
60 %b0 = extractelement <4 x i32> %b, i32 0
61 %b1 = extractelement <4 x i32> %b, i32 1
62 %b2 = extractelement <4 x i32> %b, i32 2
63 %b3 = extractelement <4 x i32> %b, i32 3
64 %t0 = trunc i32 %a0 to i8
65 %t1 = trunc i32 %a1 to i8
66 %t2 = trunc i32 %a2 to i8
67 %t3 = trunc i32 %a3 to i8
68 %t4 = trunc i32 %b0 to i8
69 %t5 = trunc i32 %b1 to i8
70 %t6 = trunc i32 %b2 to i8
71 %t7 = trunc i32 %b3 to i8
72 %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
73 %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
74 %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
75 %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
76 %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
77 %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
78 %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
79 %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
83 define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
84 ; CHECK-LABEL: extract_4_v4i16:
85 ; CHECK: // %bb.0: // %entry
86 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
87 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
88 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
89 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
90 ; CHECK-NEXT: mov v2.d[1], v3.d[0]
91 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
92 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
95 %a0 = extractelement <4 x i16> %a, i32 0
96 %a1 = extractelement <4 x i16> %a, i32 1
97 %a2 = extractelement <4 x i16> %a, i32 2
98 %a3 = extractelement <4 x i16> %a, i32 3
99 %b0 = extractelement <4 x i16> %b, i32 0
100 %b1 = extractelement <4 x i16> %b, i32 1
101 %b2 = extractelement <4 x i16> %b, i32 2
102 %b3 = extractelement <4 x i16> %b, i32 3
103 %c0 = extractelement <4 x i16> %c, i32 0
104 %c1 = extractelement <4 x i16> %c, i32 1
105 %c2 = extractelement <4 x i16> %c, i32 2
106 %c3 = extractelement <4 x i16> %c, i32 3
107 %d0 = extractelement <4 x i16> %d, i32 0
108 %d1 = extractelement <4 x i16> %d, i32 1
109 %d2 = extractelement <4 x i16> %d, i32 2
110 %d3 = extractelement <4 x i16> %d, i32 3
111 %t0 = trunc i16 %a0 to i8
112 %t1 = trunc i16 %a1 to i8
113 %t2 = trunc i16 %a2 to i8
114 %t3 = trunc i16 %a3 to i8
115 %t4 = trunc i16 %b0 to i8
116 %t5 = trunc i16 %b1 to i8
117 %t6 = trunc i16 %b2 to i8
118 %t7 = trunc i16 %b3 to i8
119 %t8 = trunc i16 %c0 to i8
120 %t9 = trunc i16 %c1 to i8
121 %t10 = trunc i16 %c2 to i8
122 %t11 = trunc i16 %c3 to i8
123 %t12 = trunc i16 %d0 to i8
124 %t13 = trunc i16 %d1 to i8
125 %t14 = trunc i16 %d2 to i8
126 %t15 = trunc i16 %d3 to i8
127 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
128 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
129 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
130 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
131 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
132 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
133 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
134 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
135 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
136 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
137 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
138 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
139 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
140 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
141 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
142 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
146 define <16 x i8> @extract_4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
147 ; CHECK-LABEL: extract_4_v4i32:
148 ; CHECK: // %bb.0: // %entry
149 ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h
150 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
151 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
154 %a0 = extractelement <4 x i32> %a, i32 0
155 %a1 = extractelement <4 x i32> %a, i32 1
156 %a2 = extractelement <4 x i32> %a, i32 2
157 %a3 = extractelement <4 x i32> %a, i32 3
158 %b0 = extractelement <4 x i32> %b, i32 0
159 %b1 = extractelement <4 x i32> %b, i32 1
160 %b2 = extractelement <4 x i32> %b, i32 2
161 %b3 = extractelement <4 x i32> %b, i32 3
162 %c0 = extractelement <4 x i32> %c, i32 0
163 %c1 = extractelement <4 x i32> %c, i32 1
164 %c2 = extractelement <4 x i32> %c, i32 2
165 %c3 = extractelement <4 x i32> %c, i32 3
166 %d0 = extractelement <4 x i32> %d, i32 0
167 %d1 = extractelement <4 x i32> %d, i32 1
168 %d2 = extractelement <4 x i32> %d, i32 2
169 %d3 = extractelement <4 x i32> %d, i32 3
170 %t0 = trunc i32 %a0 to i8
171 %t1 = trunc i32 %a1 to i8
172 %t2 = trunc i32 %a2 to i8
173 %t3 = trunc i32 %a3 to i8
174 %t4 = trunc i32 %b0 to i8
175 %t5 = trunc i32 %b1 to i8
176 %t6 = trunc i32 %b2 to i8
177 %t7 = trunc i32 %b3 to i8
178 %t8 = trunc i32 %c0 to i8
179 %t9 = trunc i32 %c1 to i8
180 %t10 = trunc i32 %c2 to i8
181 %t11 = trunc i32 %c3 to i8
182 %t12 = trunc i32 %d0 to i8
183 %t13 = trunc i32 %d1 to i8
184 %t14 = trunc i32 %d2 to i8
185 %t15 = trunc i32 %d3 to i8
186 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
187 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
188 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
189 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
190 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
191 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
192 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
193 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
194 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
195 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
196 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
197 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
198 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
199 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
200 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
201 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
205 define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x i16> %d) {
206 ; CHECK-LABEL: extract_4_mixed:
207 ; CHECK: // %bb.0: // %entry
208 ; CHECK-NEXT: xtn v2.4h, v2.4s
209 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
210 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
211 ; CHECK-NEXT: xtn2 v0.8h, v1.4s
212 ; CHECK-NEXT: mov v2.d[1], v3.d[0]
213 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
216 %a0 = extractelement <4 x i16> %a, i32 0
217 %a1 = extractelement <4 x i16> %a, i32 1
218 %a2 = extractelement <4 x i16> %a, i32 2
219 %a3 = extractelement <4 x i16> %a, i32 3
220 %b0 = extractelement <4 x i32> %b, i32 0
221 %b1 = extractelement <4 x i32> %b, i32 1
222 %b2 = extractelement <4 x i32> %b, i32 2
223 %b3 = extractelement <4 x i32> %b, i32 3
224 %c0 = extractelement <4 x i32> %c, i32 0
225 %c1 = extractelement <4 x i32> %c, i32 1
226 %c2 = extractelement <4 x i32> %c, i32 2
227 %c3 = extractelement <4 x i32> %c, i32 3
228 %d0 = extractelement <4 x i16> %d, i32 0
229 %d1 = extractelement <4 x i16> %d, i32 1
230 %d2 = extractelement <4 x i16> %d, i32 2
231 %d3 = extractelement <4 x i16> %d, i32 3
232 %t0 = trunc i16 %a0 to i8
233 %t1 = trunc i16 %a1 to i8
234 %t2 = trunc i16 %a2 to i8
235 %t3 = trunc i16 %a3 to i8
236 %t4 = trunc i32 %b0 to i8
237 %t5 = trunc i32 %b1 to i8
238 %t6 = trunc i32 %b2 to i8
239 %t7 = trunc i32 %b3 to i8
240 %t8 = trunc i32 %c0 to i8
241 %t9 = trunc i32 %c1 to i8
242 %t10 = trunc i32 %c2 to i8
243 %t11 = trunc i32 %c3 to i8
244 %t12 = trunc i16 %d0 to i8
245 %t13 = trunc i16 %d1 to i8
246 %t14 = trunc i16 %d2 to i8
247 %t15 = trunc i16 %d3 to i8
248 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
249 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
250 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
251 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
252 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
253 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
254 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
255 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
256 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
257 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
258 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
259 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
260 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
261 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
262 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
263 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
267 define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
268 ; CHECK-LABEL: extract_4_v4i32_badindex:
269 ; CHECK: // %bb.0: // %entry
270 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
271 ; CHECK-NEXT: adrp x8, .LCPI5_0
272 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
273 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0]
274 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
275 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
276 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
279 %a0 = extractelement <4 x i32> %a, i32 0
280 %a1 = extractelement <4 x i32> %a, i32 1
281 %a2 = extractelement <4 x i32> %a, i32 2
282 %a3 = extractelement <4 x i32> %a, i32 3
283 %b0 = extractelement <4 x i32> %b, i32 0
284 %b1 = extractelement <4 x i32> %b, i32 2
285 %b2 = extractelement <4 x i32> %b, i32 1
286 %b3 = extractelement <4 x i32> %b, i32 3
287 %c0 = extractelement <4 x i32> %c, i32 0
288 %c1 = extractelement <4 x i32> %c, i32 1
289 %c2 = extractelement <4 x i32> %c, i32 2
290 %c3 = extractelement <4 x i32> %c, i32 3
291 %d0 = extractelement <4 x i32> %d, i32 0
292 %d1 = extractelement <4 x i32> %d, i32 1
293 %d2 = extractelement <4 x i32> %d, i32 2
294 %d3 = extractelement <4 x i32> %d, i32 3
295 %t0 = trunc i32 %a0 to i8
296 %t1 = trunc i32 %a1 to i8
297 %t2 = trunc i32 %a2 to i8
298 %t3 = trunc i32 %a3 to i8
299 %t4 = trunc i32 %b0 to i8
300 %t5 = trunc i32 %b1 to i8
301 %t6 = trunc i32 %b2 to i8
302 %t7 = trunc i32 %b3 to i8
303 %t8 = trunc i32 %c0 to i8
304 %t9 = trunc i32 %c1 to i8
305 %t10 = trunc i32 %c2 to i8
306 %t11 = trunc i32 %c3 to i8
307 %t12 = trunc i32 %d0 to i8
308 %t13 = trunc i32 %d1 to i8
309 %t14 = trunc i32 %d2 to i8
310 %t15 = trunc i32 %d3 to i8
311 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
312 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
313 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
314 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
315 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
316 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
317 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
318 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
319 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
320 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
321 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
322 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
323 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
324 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
325 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
326 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
330 define <16 x i8> @extract_4_v4i32_one(<4 x i32> %a) {
331 ; CHECK-LABEL: extract_4_v4i32_one:
332 ; CHECK: // %bb.0: // %entry
333 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
334 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v0.16b
337 %a0 = extractelement <4 x i32> %a, i32 0
338 %a1 = extractelement <4 x i32> %a, i32 1
339 %a2 = extractelement <4 x i32> %a, i32 2
340 %a3 = extractelement <4 x i32> %a, i32 3
341 %t0 = trunc i32 %a0 to i8
342 %t1 = trunc i32 %a1 to i8
343 %t2 = trunc i32 %a2 to i8
344 %t3 = trunc i32 %a3 to i8
345 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
346 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
347 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
348 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
349 %i4 = insertelement <16 x i8> %i3, i8 %t0, i32 4
350 %i5 = insertelement <16 x i8> %i4, i8 %t1, i32 5
351 %i6 = insertelement <16 x i8> %i5, i8 %t2, i32 6
352 %i7 = insertelement <16 x i8> %i6, i8 %t3, i32 7
353 %i8 = insertelement <16 x i8> %i7, i8 %t0, i32 8
354 %i9 = insertelement <16 x i8> %i8, i8 %t1, i32 9
355 %i10 = insertelement <16 x i8> %i9, i8 %t2, i32 10
356 %i11 = insertelement <16 x i8> %i10, i8 %t3, i32 11
357 %i12 = insertelement <16 x i8> %i11, i8 %t0, i32 12
358 %i13 = insertelement <16 x i8> %i12, i8 %t1, i32 13
359 %i14 = insertelement <16 x i8> %i13, i8 %t2, i32 14
360 %i15 = insertelement <16 x i8> %i14, i8 %t3, i32 15