1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; Test efficient codegen of vector extends up from legal type to 128 bit
6 ; and 256 bit vector types.
8 ; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
9 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1
15 define <8 x i16> @func1(<8 x i8> %v0) nounwind {
18 ; CHECK-NEXT: ushll.8h v0, v0, #0
20 %r = zext <8 x i8> %v0 to <8 x i16>
24 define <8 x i16> @func2(<8 x i8> %v0) nounwind {
27 ; CHECK-NEXT: sshll.8h v0, v0, #0
29 %r = sext <8 x i8> %v0 to <8 x i16>
33 define <16 x i16> @func3(<16 x i8> %v0) nounwind {
34 ; CHECK-SD-LABEL: func3:
36 ; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
37 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
40 ; CHECK-GI-LABEL: func3:
42 ; CHECK-GI-NEXT: ushll.8h v2, v0, #0
43 ; CHECK-GI-NEXT: ushll2.8h v1, v0, #0
44 ; CHECK-GI-NEXT: mov.16b v0, v2
46 %r = zext <16 x i8> %v0 to <16 x i16>
50 define <16 x i16> @func4(<16 x i8> %v0) nounwind {
51 ; CHECK-SD-LABEL: func4:
53 ; CHECK-SD-NEXT: sshll2.8h v1, v0, #0
54 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
57 ; CHECK-GI-LABEL: func4:
59 ; CHECK-GI-NEXT: sshll.8h v2, v0, #0
60 ; CHECK-GI-NEXT: sshll2.8h v1, v0, #0
61 ; CHECK-GI-NEXT: mov.16b v0, v2
63 %r = sext <16 x i8> %v0 to <16 x i16>
71 define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
72 ; CHECK-LABEL: afunc1:
74 ; CHECK-NEXT: ushll.4s v0, v0, #0
76 %r = zext <4 x i16> %v0 to <4 x i32>
80 define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
81 ; CHECK-LABEL: afunc2:
83 ; CHECK-NEXT: sshll.4s v0, v0, #0
85 %r = sext <4 x i16> %v0 to <4 x i32>
89 define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
90 ; CHECK-SD-LABEL: afunc3:
92 ; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
93 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
96 ; CHECK-GI-LABEL: afunc3:
98 ; CHECK-GI-NEXT: ushll.4s v2, v0, #0
99 ; CHECK-GI-NEXT: ushll2.4s v1, v0, #0
100 ; CHECK-GI-NEXT: mov.16b v0, v2
102 %r = zext <8 x i16> %v0 to <8 x i32>
106 define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
107 ; CHECK-SD-LABEL: afunc4:
108 ; CHECK-SD: // %bb.0:
109 ; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
110 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
113 ; CHECK-GI-LABEL: afunc4:
114 ; CHECK-GI: // %bb.0:
115 ; CHECK-GI-NEXT: sshll.4s v2, v0, #0
116 ; CHECK-GI-NEXT: sshll2.4s v1, v0, #0
117 ; CHECK-GI-NEXT: mov.16b v0, v2
119 %r = sext <8 x i16> %v0 to <8 x i32>
123 define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
124 ; CHECK-SD-LABEL: bfunc1:
125 ; CHECK-SD: // %bb.0:
126 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
127 ; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
128 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
131 ; CHECK-GI-LABEL: bfunc1:
132 ; CHECK-GI: // %bb.0:
133 ; CHECK-GI-NEXT: ushll.8h v1, v0, #0
134 ; CHECK-GI-NEXT: ushll.4s v0, v1, #0
135 ; CHECK-GI-NEXT: ushll2.4s v1, v1, #0
137 %r = zext <8 x i8> %v0 to <8 x i32>
141 define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
142 ; CHECK-SD-LABEL: bfunc2:
143 ; CHECK-SD: // %bb.0:
144 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
145 ; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
146 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
149 ; CHECK-GI-LABEL: bfunc2:
150 ; CHECK-GI: // %bb.0:
151 ; CHECK-GI-NEXT: sshll.8h v1, v0, #0
152 ; CHECK-GI-NEXT: sshll.4s v0, v1, #0
153 ; CHECK-GI-NEXT: sshll2.4s v1, v1, #0
155 %r = sext <8 x i8> %v0 to <8 x i32>
163 define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
164 ; CHECK-SD-LABEL: zfunc1:
165 ; CHECK-SD: // %bb.0:
166 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
167 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
170 ; CHECK-GI-LABEL: zfunc1:
171 ; CHECK-GI: // %bb.0:
172 ; CHECK-GI-NEXT: ushll.2d v2, v0, #0
173 ; CHECK-GI-NEXT: ushll2.2d v1, v0, #0
174 ; CHECK-GI-NEXT: mov.16b v0, v2
176 %r = zext <4 x i32> %v0 to <4 x i64>
180 define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
181 ; CHECK-SD-LABEL: zfunc2:
182 ; CHECK-SD: // %bb.0:
183 ; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
184 ; CHECK-SD-NEXT: sshll.2d v0, v0, #0
187 ; CHECK-GI-LABEL: zfunc2:
188 ; CHECK-GI: // %bb.0:
189 ; CHECK-GI-NEXT: sshll.2d v2, v0, #0
190 ; CHECK-GI-NEXT: sshll2.2d v1, v0, #0
191 ; CHECK-GI-NEXT: mov.16b v0, v2
193 %r = sext <4 x i32> %v0 to <4 x i64>
197 define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
198 ; CHECK-SD-LABEL: bfunc3:
199 ; CHECK-SD: // %bb.0:
200 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
201 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
202 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
205 ; CHECK-GI-LABEL: bfunc3:
206 ; CHECK-GI: // %bb.0:
207 ; CHECK-GI-NEXT: ushll.4s v1, v0, #0
208 ; CHECK-GI-NEXT: ushll.2d v0, v1, #0
209 ; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
211 %r = zext <4 x i16> %v0 to <4 x i64>
215 define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
216 ; CHECK-SD-LABEL: cfunc4:
217 ; CHECK-SD: // %bb.0:
218 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
219 ; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
220 ; CHECK-SD-NEXT: sshll.2d v0, v0, #0
223 ; CHECK-GI-LABEL: cfunc4:
224 ; CHECK-GI: // %bb.0:
225 ; CHECK-GI-NEXT: sshll.4s v1, v0, #0
226 ; CHECK-GI-NEXT: sshll.2d v0, v1, #0
227 ; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
229 %r = sext <4 x i16> %v0 to <4 x i64>
233 define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
234 ; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
235 ; CHECK-SD: // %bb.0:
236 ; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
237 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
238 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
239 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
242 ; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
243 ; CHECK-GI: // %bb.0:
244 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
245 ; CHECK-GI-NEXT: movi.2d v1, #0x000000000000ff
246 ; CHECK-GI-NEXT: ushll.2d v2, v0, #0
247 ; CHECK-GI-NEXT: ushll2.2d v3, v0, #0
248 ; CHECK-GI-NEXT: and.16b v0, v2, v1
249 ; CHECK-GI-NEXT: and.16b v1, v3, v1
251 %r = zext <4 x i8> %v0 to <4 x i64>
255 define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
256 ; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
257 ; CHECK-SD: // %bb.0:
258 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
259 ; CHECK-SD-NEXT: ushll.2d v1, v0, #0
260 ; CHECK-SD-NEXT: ushll2.2d v0, v0, #0
261 ; CHECK-SD-NEXT: shl.2d v0, v0, #56
262 ; CHECK-SD-NEXT: shl.2d v2, v1, #56
263 ; CHECK-SD-NEXT: sshr.2d v1, v0, #56
264 ; CHECK-SD-NEXT: sshr.2d v0, v2, #56
267 ; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
268 ; CHECK-GI: // %bb.0:
269 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
270 ; CHECK-GI-NEXT: ushll.2d v1, v0, #0
271 ; CHECK-GI-NEXT: ushll2.2d v0, v0, #0
272 ; CHECK-GI-NEXT: shl.2d v1, v1, #56
273 ; CHECK-GI-NEXT: shl.2d v2, v0, #56
274 ; CHECK-GI-NEXT: sshr.2d v0, v1, #56
275 ; CHECK-GI-NEXT: sshr.2d v1, v2, #56
277 %r = sext <4 x i8> %v0 to <4 x i64>
281 define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
282 ; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
283 ; CHECK-SD: // %bb.0:
284 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
285 ; CHECK-SD-NEXT: ushll.4s v1, v0, #0
286 ; CHECK-SD-NEXT: ushll2.4s v2, v0, #0
287 ; CHECK-SD-NEXT: ushll.2d v0, v1, #0
288 ; CHECK-SD-NEXT: ushll2.2d v3, v2, #0
289 ; CHECK-SD-NEXT: ushll2.2d v1, v1, #0
290 ; CHECK-SD-NEXT: ushll.2d v2, v2, #0
293 ; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
294 ; CHECK-GI: // %bb.0:
295 ; CHECK-GI-NEXT: ushll.8h v0, v0, #0
296 ; CHECK-GI-NEXT: ushll.4s v1, v0, #0
297 ; CHECK-GI-NEXT: ushll2.4s v3, v0, #0
298 ; CHECK-GI-NEXT: ushll.2d v0, v1, #0
299 ; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
300 ; CHECK-GI-NEXT: ushll.2d v2, v3, #0
301 ; CHECK-GI-NEXT: ushll2.2d v3, v3, #0
303 %r = zext <8 x i8> %v0 to <8 x i64>
307 define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
308 ; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
309 ; CHECK-SD: // %bb.0:
310 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
311 ; CHECK-SD-NEXT: sshll.4s v1, v0, #0
312 ; CHECK-SD-NEXT: sshll2.4s v2, v0, #0
313 ; CHECK-SD-NEXT: sshll.2d v0, v1, #0
314 ; CHECK-SD-NEXT: sshll2.2d v3, v2, #0
315 ; CHECK-SD-NEXT: sshll2.2d v1, v1, #0
316 ; CHECK-SD-NEXT: sshll.2d v2, v2, #0
319 ; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
320 ; CHECK-GI: // %bb.0:
321 ; CHECK-GI-NEXT: sshll.8h v0, v0, #0
322 ; CHECK-GI-NEXT: sshll.4s v1, v0, #0
323 ; CHECK-GI-NEXT: sshll2.4s v3, v0, #0
324 ; CHECK-GI-NEXT: sshll.2d v0, v1, #0
325 ; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
326 ; CHECK-GI-NEXT: sshll.2d v2, v3, #0
327 ; CHECK-GI-NEXT: sshll2.2d v3, v3, #0
329 %r = sext <8 x i8> %v0 to <8 x i64>
333 ; Extends of vectors of i1.
335 define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
336 ; CHECK-LABEL: zext_v32i1:
338 ; CHECK-NEXT: ldr w8, [sp, #64]
339 ; CHECK-NEXT: fmov s0, w0
340 ; CHECK-NEXT: ldr w9, [sp, #72]
341 ; CHECK-NEXT: movi.16b v2, #1
342 ; CHECK-NEXT: fmov s1, w8
343 ; CHECK-NEXT: ldr w8, [sp, #80]
344 ; CHECK-NEXT: mov.b v0[1], w1
345 ; CHECK-NEXT: mov.b v1[1], w9
346 ; CHECK-NEXT: ldr w9, [sp]
347 ; CHECK-NEXT: mov.b v0[2], w2
348 ; CHECK-NEXT: mov.b v1[2], w8
349 ; CHECK-NEXT: ldr w8, [sp, #88]
350 ; CHECK-NEXT: mov.b v0[3], w3
351 ; CHECK-NEXT: mov.b v1[3], w8
352 ; CHECK-NEXT: ldr w8, [sp, #96]
353 ; CHECK-NEXT: mov.b v0[4], w4
354 ; CHECK-NEXT: mov.b v1[4], w8
355 ; CHECK-NEXT: ldr w8, [sp, #104]
356 ; CHECK-NEXT: mov.b v0[5], w5
357 ; CHECK-NEXT: mov.b v1[5], w8
358 ; CHECK-NEXT: ldr w8, [sp, #112]
359 ; CHECK-NEXT: mov.b v0[6], w6
360 ; CHECK-NEXT: mov.b v1[6], w8
361 ; CHECK-NEXT: ldr w8, [sp, #120]
362 ; CHECK-NEXT: mov.b v0[7], w7
363 ; CHECK-NEXT: mov.b v1[7], w8
364 ; CHECK-NEXT: ldr w8, [sp, #128]
365 ; CHECK-NEXT: mov.b v0[8], w9
366 ; CHECK-NEXT: ldr w9, [sp, #8]
367 ; CHECK-NEXT: mov.b v1[8], w8
368 ; CHECK-NEXT: ldr w8, [sp, #136]
369 ; CHECK-NEXT: mov.b v0[9], w9
370 ; CHECK-NEXT: ldr w9, [sp, #16]
371 ; CHECK-NEXT: mov.b v1[9], w8
372 ; CHECK-NEXT: ldr w8, [sp, #144]
373 ; CHECK-NEXT: mov.b v0[10], w9
374 ; CHECK-NEXT: ldr w9, [sp, #24]
375 ; CHECK-NEXT: mov.b v1[10], w8
376 ; CHECK-NEXT: ldr w8, [sp, #152]
377 ; CHECK-NEXT: mov.b v0[11], w9
378 ; CHECK-NEXT: ldr w9, [sp, #32]
379 ; CHECK-NEXT: mov.b v1[11], w8
380 ; CHECK-NEXT: ldr w8, [sp, #160]
381 ; CHECK-NEXT: mov.b v0[12], w9
382 ; CHECK-NEXT: ldr w9, [sp, #40]
383 ; CHECK-NEXT: mov.b v1[12], w8
384 ; CHECK-NEXT: ldr w8, [sp, #168]
385 ; CHECK-NEXT: mov.b v0[13], w9
386 ; CHECK-NEXT: ldr w9, [sp, #48]
387 ; CHECK-NEXT: mov.b v1[13], w8
388 ; CHECK-NEXT: ldr w8, [sp, #176]
389 ; CHECK-NEXT: mov.b v0[14], w9
390 ; CHECK-NEXT: ldr w9, [sp, #56]
391 ; CHECK-NEXT: mov.b v1[14], w8
392 ; CHECK-NEXT: ldr w8, [sp, #184]
393 ; CHECK-NEXT: mov.b v0[15], w9
394 ; CHECK-NEXT: mov.b v1[15], w8
395 ; CHECK-NEXT: and.16b v0, v0, v2
396 ; CHECK-NEXT: and.16b v1, v1, v2
398 %res = zext <32 x i1> %arg to <32 x i8>
402 define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
403 ; CHECK-SD-LABEL: sext_v32i1:
404 ; CHECK-SD: // %bb.0:
405 ; CHECK-SD-NEXT: ldr w8, [sp, #64]
406 ; CHECK-SD-NEXT: fmov s1, w0
407 ; CHECK-SD-NEXT: ldr w9, [sp, #72]
408 ; CHECK-SD-NEXT: fmov s0, w8
409 ; CHECK-SD-NEXT: ldr w8, [sp, #80]
410 ; CHECK-SD-NEXT: mov.b v1[1], w1
411 ; CHECK-SD-NEXT: mov.b v0[1], w9
412 ; CHECK-SD-NEXT: ldr w9, [sp]
413 ; CHECK-SD-NEXT: mov.b v1[2], w2
414 ; CHECK-SD-NEXT: mov.b v0[2], w8
415 ; CHECK-SD-NEXT: ldr w8, [sp, #88]
416 ; CHECK-SD-NEXT: mov.b v1[3], w3
417 ; CHECK-SD-NEXT: mov.b v0[3], w8
418 ; CHECK-SD-NEXT: ldr w8, [sp, #96]
419 ; CHECK-SD-NEXT: mov.b v1[4], w4
420 ; CHECK-SD-NEXT: mov.b v0[4], w8
421 ; CHECK-SD-NEXT: ldr w8, [sp, #104]
422 ; CHECK-SD-NEXT: mov.b v1[5], w5
423 ; CHECK-SD-NEXT: mov.b v0[5], w8
424 ; CHECK-SD-NEXT: ldr w8, [sp, #112]
425 ; CHECK-SD-NEXT: mov.b v1[6], w6
426 ; CHECK-SD-NEXT: mov.b v0[6], w8
427 ; CHECK-SD-NEXT: ldr w8, [sp, #120]
428 ; CHECK-SD-NEXT: mov.b v1[7], w7
429 ; CHECK-SD-NEXT: mov.b v0[7], w8
430 ; CHECK-SD-NEXT: ldr w8, [sp, #128]
431 ; CHECK-SD-NEXT: mov.b v1[8], w9
432 ; CHECK-SD-NEXT: ldr w9, [sp, #8]
433 ; CHECK-SD-NEXT: mov.b v0[8], w8
434 ; CHECK-SD-NEXT: ldr w8, [sp, #136]
435 ; CHECK-SD-NEXT: mov.b v1[9], w9
436 ; CHECK-SD-NEXT: ldr w9, [sp, #16]
437 ; CHECK-SD-NEXT: mov.b v0[9], w8
438 ; CHECK-SD-NEXT: ldr w8, [sp, #144]
439 ; CHECK-SD-NEXT: mov.b v1[10], w9
440 ; CHECK-SD-NEXT: ldr w9, [sp, #24]
441 ; CHECK-SD-NEXT: mov.b v0[10], w8
442 ; CHECK-SD-NEXT: ldr w8, [sp, #152]
443 ; CHECK-SD-NEXT: mov.b v1[11], w9
444 ; CHECK-SD-NEXT: ldr w9, [sp, #32]
445 ; CHECK-SD-NEXT: mov.b v0[11], w8
446 ; CHECK-SD-NEXT: ldr w8, [sp, #160]
447 ; CHECK-SD-NEXT: mov.b v1[12], w9
448 ; CHECK-SD-NEXT: ldr w9, [sp, #40]
449 ; CHECK-SD-NEXT: mov.b v0[12], w8
450 ; CHECK-SD-NEXT: ldr w8, [sp, #168]
451 ; CHECK-SD-NEXT: mov.b v1[13], w9
452 ; CHECK-SD-NEXT: ldr w9, [sp, #48]
453 ; CHECK-SD-NEXT: mov.b v0[13], w8
454 ; CHECK-SD-NEXT: ldr w8, [sp, #176]
455 ; CHECK-SD-NEXT: mov.b v1[14], w9
456 ; CHECK-SD-NEXT: ldr w9, [sp, #56]
457 ; CHECK-SD-NEXT: mov.b v0[14], w8
458 ; CHECK-SD-NEXT: ldr w8, [sp, #184]
459 ; CHECK-SD-NEXT: mov.b v1[15], w9
460 ; CHECK-SD-NEXT: mov.b v0[15], w8
461 ; CHECK-SD-NEXT: shl.16b v1, v1, #7
462 ; CHECK-SD-NEXT: shl.16b v2, v0, #7
463 ; CHECK-SD-NEXT: cmlt.16b v0, v1, #0
464 ; CHECK-SD-NEXT: cmlt.16b v1, v2, #0
467 ; CHECK-GI-LABEL: sext_v32i1:
468 ; CHECK-GI: // %bb.0:
469 ; CHECK-GI-NEXT: fmov s17, w0
470 ; CHECK-GI-NEXT: fmov s19, w4
471 ; CHECK-GI-NEXT: ldr s0, [sp]
472 ; CHECK-GI-NEXT: ldr s21, [sp, #8]
473 ; CHECK-GI-NEXT: ldr s1, [sp, #32]
474 ; CHECK-GI-NEXT: ldr s22, [sp, #40]
475 ; CHECK-GI-NEXT: ldr s2, [sp, #64]
476 ; CHECK-GI-NEXT: ldr s23, [sp, #72]
477 ; CHECK-GI-NEXT: ldr s3, [sp, #96]
478 ; CHECK-GI-NEXT: ldr s24, [sp, #104]
479 ; CHECK-GI-NEXT: mov.s v17[1], w1
480 ; CHECK-GI-NEXT: mov.s v19[1], w5
481 ; CHECK-GI-NEXT: ldr s5, [sp, #128]
482 ; CHECK-GI-NEXT: ldr s20, [sp, #136]
483 ; CHECK-GI-NEXT: mov.s v0[1], v21[0]
484 ; CHECK-GI-NEXT: ldr s7, [sp, #160]
485 ; CHECK-GI-NEXT: ldr s25, [sp, #168]
486 ; CHECK-GI-NEXT: mov.s v1[1], v22[0]
487 ; CHECK-GI-NEXT: mov.s v2[1], v23[0]
488 ; CHECK-GI-NEXT: mov.s v3[1], v24[0]
489 ; CHECK-GI-NEXT: mov.s v5[1], v20[0]
490 ; CHECK-GI-NEXT: mov.s v7[1], v25[0]
491 ; CHECK-GI-NEXT: ldr s16, [sp, #16]
492 ; CHECK-GI-NEXT: ldr s18, [sp, #48]
493 ; CHECK-GI-NEXT: ldr s20, [sp, #80]
494 ; CHECK-GI-NEXT: ldr s21, [sp, #112]
495 ; CHECK-GI-NEXT: ldr s22, [sp, #144]
496 ; CHECK-GI-NEXT: ldr s23, [sp, #176]
497 ; CHECK-GI-NEXT: mov.s v17[2], w2
498 ; CHECK-GI-NEXT: mov.s v19[2], w6
499 ; CHECK-GI-NEXT: mov.s v0[2], v16[0]
500 ; CHECK-GI-NEXT: mov.s v1[2], v18[0]
501 ; CHECK-GI-NEXT: mov.s v2[2], v20[0]
502 ; CHECK-GI-NEXT: mov.s v3[2], v21[0]
503 ; CHECK-GI-NEXT: mov.s v5[2], v22[0]
504 ; CHECK-GI-NEXT: mov.s v7[2], v23[0]
505 ; CHECK-GI-NEXT: ldr s4, [sp, #24]
506 ; CHECK-GI-NEXT: ldr s6, [sp, #56]
507 ; CHECK-GI-NEXT: ldr s16, [sp, #88]
508 ; CHECK-GI-NEXT: ldr s18, [sp, #120]
509 ; CHECK-GI-NEXT: ldr s20, [sp, #152]
510 ; CHECK-GI-NEXT: ldr s21, [sp, #184]
511 ; CHECK-GI-NEXT: mov.s v17[3], w3
512 ; CHECK-GI-NEXT: mov.s v19[3], w7
513 ; CHECK-GI-NEXT: mov.s v0[3], v4[0]
514 ; CHECK-GI-NEXT: mov.s v1[3], v6[0]
515 ; CHECK-GI-NEXT: mov.s v2[3], v16[0]
516 ; CHECK-GI-NEXT: mov.s v3[3], v18[0]
517 ; CHECK-GI-NEXT: mov.s v5[3], v20[0]
518 ; CHECK-GI-NEXT: mov.s v7[3], v21[0]
519 ; CHECK-GI-NEXT: uzp1.8h v4, v17, v19
520 ; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
521 ; CHECK-GI-NEXT: uzp1.8h v1, v2, v3
522 ; CHECK-GI-NEXT: uzp1.8h v2, v5, v7
523 ; CHECK-GI-NEXT: uzp1.16b v0, v4, v0
524 ; CHECK-GI-NEXT: uzp1.16b v1, v1, v2
525 ; CHECK-GI-NEXT: shl.16b v0, v0, #7
526 ; CHECK-GI-NEXT: shl.16b v1, v1, #7
527 ; CHECK-GI-NEXT: sshr.16b v0, v0, #7
528 ; CHECK-GI-NEXT: sshr.16b v1, v1, #7
530 %res = sext <32 x i1> %arg to <32 x i8>
534 define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
535 ; CHECK-LABEL: zext_v64i1:
537 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
538 ; CHECK-NEXT: .cfi_def_cfa_offset 16
539 ; CHECK-NEXT: .cfi_offset w29, -16
540 ; CHECK-NEXT: ldr w8, [sp, #336]
541 ; CHECK-NEXT: ldr w9, [sp, #208]
542 ; CHECK-NEXT: fmov s0, w0
543 ; CHECK-NEXT: ldr w10, [sp, #80]
544 ; CHECK-NEXT: ldr w11, [sp, #216]
545 ; CHECK-NEXT: movi.16b v4, #1
546 ; CHECK-NEXT: fmov s3, w8
547 ; CHECK-NEXT: fmov s2, w9
548 ; CHECK-NEXT: ldr w8, [sp, #344]
549 ; CHECK-NEXT: fmov s1, w10
550 ; CHECK-NEXT: ldr w12, [sp, #88]
551 ; CHECK-NEXT: mov.b v0[1], w1
552 ; CHECK-NEXT: ldr w9, [sp, #224]
553 ; CHECK-NEXT: ldr w10, [sp, #96]
554 ; CHECK-NEXT: mov.b v3[1], w8
555 ; CHECK-NEXT: mov.b v2[1], w11
556 ; CHECK-NEXT: ldr w8, [sp, #352]
557 ; CHECK-NEXT: mov.b v1[1], w12
558 ; CHECK-NEXT: ldr w11, [sp, #144]
559 ; CHECK-NEXT: mov.b v0[2], w2
560 ; CHECK-NEXT: mov.b v3[2], w8
561 ; CHECK-NEXT: mov.b v2[2], w9
562 ; CHECK-NEXT: ldr w8, [sp, #360]
563 ; CHECK-NEXT: mov.b v1[2], w10
564 ; CHECK-NEXT: ldr w9, [sp, #232]
565 ; CHECK-NEXT: ldr w10, [sp, #104]
566 ; CHECK-NEXT: mov.b v0[3], w3
567 ; CHECK-NEXT: mov.b v3[3], w8
568 ; CHECK-NEXT: mov.b v2[3], w9
569 ; CHECK-NEXT: ldr w8, [sp, #368]
570 ; CHECK-NEXT: mov.b v1[3], w10
571 ; CHECK-NEXT: ldr w9, [sp, #240]
572 ; CHECK-NEXT: ldr w10, [sp, #112]
573 ; CHECK-NEXT: mov.b v0[4], w4
574 ; CHECK-NEXT: mov.b v3[4], w8
575 ; CHECK-NEXT: mov.b v2[4], w9
576 ; CHECK-NEXT: ldr w8, [sp, #376]
577 ; CHECK-NEXT: mov.b v1[4], w10
578 ; CHECK-NEXT: ldr w9, [sp, #248]
579 ; CHECK-NEXT: ldr w10, [sp, #120]
580 ; CHECK-NEXT: mov.b v0[5], w5
581 ; CHECK-NEXT: mov.b v3[5], w8
582 ; CHECK-NEXT: mov.b v2[5], w9
583 ; CHECK-NEXT: ldr w8, [sp, #384]
584 ; CHECK-NEXT: mov.b v1[5], w10
585 ; CHECK-NEXT: ldr w9, [sp, #256]
586 ; CHECK-NEXT: ldr w10, [sp, #128]
587 ; CHECK-NEXT: mov.b v0[6], w6
588 ; CHECK-NEXT: mov.b v3[6], w8
589 ; CHECK-NEXT: mov.b v2[6], w9
590 ; CHECK-NEXT: ldr w8, [sp, #392]
591 ; CHECK-NEXT: mov.b v1[6], w10
592 ; CHECK-NEXT: ldr w9, [sp, #264]
593 ; CHECK-NEXT: ldr w10, [sp, #136]
594 ; CHECK-NEXT: mov.b v0[7], w7
595 ; CHECK-NEXT: mov.b v3[7], w8
596 ; CHECK-NEXT: mov.b v2[7], w9
597 ; CHECK-NEXT: ldr w8, [sp, #16]
598 ; CHECK-NEXT: mov.b v1[7], w10
599 ; CHECK-NEXT: ldr w9, [sp, #400]
600 ; CHECK-NEXT: ldr w10, [sp, #272]
601 ; CHECK-NEXT: mov.b v0[8], w8
602 ; CHECK-NEXT: ldr w8, [sp, #24]
603 ; CHECK-NEXT: mov.b v3[8], w9
604 ; CHECK-NEXT: mov.b v2[8], w10
605 ; CHECK-NEXT: ldr w9, [sp, #408]
606 ; CHECK-NEXT: mov.b v1[8], w11
607 ; CHECK-NEXT: ldr w10, [sp, #280]
608 ; CHECK-NEXT: ldr w11, [sp, #152]
609 ; CHECK-NEXT: mov.b v0[9], w8
610 ; CHECK-NEXT: ldr w8, [sp, #32]
611 ; CHECK-NEXT: mov.b v3[9], w9
612 ; CHECK-NEXT: mov.b v2[9], w10
613 ; CHECK-NEXT: ldr w9, [sp, #416]
614 ; CHECK-NEXT: mov.b v1[9], w11
615 ; CHECK-NEXT: ldr w10, [sp, #288]
616 ; CHECK-NEXT: ldr w11, [sp, #160]
617 ; CHECK-NEXT: mov.b v0[10], w8
618 ; CHECK-NEXT: ldr w8, [sp, #40]
619 ; CHECK-NEXT: mov.b v3[10], w9
620 ; CHECK-NEXT: mov.b v2[10], w10
621 ; CHECK-NEXT: ldr w9, [sp, #424]
622 ; CHECK-NEXT: mov.b v1[10], w11
623 ; CHECK-NEXT: ldr w10, [sp, #296]
624 ; CHECK-NEXT: ldr w11, [sp, #168]
625 ; CHECK-NEXT: mov.b v0[11], w8
626 ; CHECK-NEXT: ldr w8, [sp, #48]
627 ; CHECK-NEXT: mov.b v3[11], w9
628 ; CHECK-NEXT: mov.b v2[11], w10
629 ; CHECK-NEXT: ldr w9, [sp, #432]
630 ; CHECK-NEXT: mov.b v1[11], w11
631 ; CHECK-NEXT: ldr w10, [sp, #304]
632 ; CHECK-NEXT: ldr w11, [sp, #176]
633 ; CHECK-NEXT: mov.b v0[12], w8
634 ; CHECK-NEXT: ldr w8, [sp, #56]
635 ; CHECK-NEXT: mov.b v3[12], w9
636 ; CHECK-NEXT: mov.b v2[12], w10
637 ; CHECK-NEXT: ldr w9, [sp, #440]
638 ; CHECK-NEXT: mov.b v1[12], w11
639 ; CHECK-NEXT: ldr w10, [sp, #312]
640 ; CHECK-NEXT: ldr w11, [sp, #184]
641 ; CHECK-NEXT: mov.b v0[13], w8
642 ; CHECK-NEXT: ldr w8, [sp, #64]
643 ; CHECK-NEXT: mov.b v3[13], w9
644 ; CHECK-NEXT: mov.b v2[13], w10
645 ; CHECK-NEXT: ldr w9, [sp, #448]
646 ; CHECK-NEXT: mov.b v1[13], w11
647 ; CHECK-NEXT: ldr w10, [sp, #320]
648 ; CHECK-NEXT: ldr w11, [sp, #192]
649 ; CHECK-NEXT: mov.b v0[14], w8
650 ; CHECK-NEXT: ldr w8, [sp, #72]
651 ; CHECK-NEXT: mov.b v3[14], w9
652 ; CHECK-NEXT: mov.b v2[14], w10
653 ; CHECK-NEXT: ldr w9, [sp, #456]
654 ; CHECK-NEXT: mov.b v1[14], w11
655 ; CHECK-NEXT: ldr w10, [sp, #328]
656 ; CHECK-NEXT: ldr w11, [sp, #200]
657 ; CHECK-NEXT: mov.b v0[15], w8
658 ; CHECK-NEXT: mov.b v3[15], w9
659 ; CHECK-NEXT: mov.b v2[15], w10
660 ; CHECK-NEXT: mov.b v1[15], w11
661 ; CHECK-NEXT: and.16b v0, v0, v4
662 ; CHECK-NEXT: and.16b v2, v2, v4
663 ; CHECK-NEXT: and.16b v3, v3, v4
664 ; CHECK-NEXT: and.16b v1, v1, v4
665 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
667 %res = zext <64 x i1> %arg to <64 x i8>
671 define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
672 ; CHECK-SD-LABEL: sext_v64i1:
673 ; CHECK-SD: // %bb.0:
674 ; CHECK-SD-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
675 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
676 ; CHECK-SD-NEXT: .cfi_offset w29, -16
677 ; CHECK-SD-NEXT: ldr w8, [sp, #336]
678 ; CHECK-SD-NEXT: ldr w9, [sp, #208]
679 ; CHECK-SD-NEXT: fmov s2, w0
680 ; CHECK-SD-NEXT: ldr w10, [sp, #80]
681 ; CHECK-SD-NEXT: ldr w11, [sp, #216]
682 ; CHECK-SD-NEXT: ldr w12, [sp, #88]
683 ; CHECK-SD-NEXT: fmov s0, w8
684 ; CHECK-SD-NEXT: fmov s1, w9
685 ; CHECK-SD-NEXT: ldr w8, [sp, #344]
686 ; CHECK-SD-NEXT: fmov s3, w10
687 ; CHECK-SD-NEXT: mov.b v2[1], w1
688 ; CHECK-SD-NEXT: ldr w9, [sp, #224]
689 ; CHECK-SD-NEXT: ldr w10, [sp, #96]
690 ; CHECK-SD-NEXT: mov.b v0[1], w8
691 ; CHECK-SD-NEXT: mov.b v1[1], w11
692 ; CHECK-SD-NEXT: ldr w8, [sp, #352]
693 ; CHECK-SD-NEXT: mov.b v3[1], w12
694 ; CHECK-SD-NEXT: ldr w11, [sp, #144]
695 ; CHECK-SD-NEXT: mov.b v2[2], w2
696 ; CHECK-SD-NEXT: mov.b v0[2], w8
697 ; CHECK-SD-NEXT: mov.b v1[2], w9
698 ; CHECK-SD-NEXT: ldr w8, [sp, #360]
699 ; CHECK-SD-NEXT: mov.b v3[2], w10
700 ; CHECK-SD-NEXT: ldr w9, [sp, #232]
701 ; CHECK-SD-NEXT: ldr w10, [sp, #104]
702 ; CHECK-SD-NEXT: mov.b v2[3], w3
703 ; CHECK-SD-NEXT: mov.b v0[3], w8
704 ; CHECK-SD-NEXT: mov.b v1[3], w9
705 ; CHECK-SD-NEXT: ldr w8, [sp, #368]
706 ; CHECK-SD-NEXT: mov.b v3[3], w10
707 ; CHECK-SD-NEXT: ldr w9, [sp, #240]
708 ; CHECK-SD-NEXT: ldr w10, [sp, #112]
709 ; CHECK-SD-NEXT: mov.b v2[4], w4
710 ; CHECK-SD-NEXT: mov.b v0[4], w8
711 ; CHECK-SD-NEXT: mov.b v1[4], w9
712 ; CHECK-SD-NEXT: ldr w8, [sp, #376]
713 ; CHECK-SD-NEXT: mov.b v3[4], w10
714 ; CHECK-SD-NEXT: ldr w9, [sp, #248]
715 ; CHECK-SD-NEXT: ldr w10, [sp, #120]
716 ; CHECK-SD-NEXT: mov.b v2[5], w5
717 ; CHECK-SD-NEXT: mov.b v0[5], w8
718 ; CHECK-SD-NEXT: mov.b v1[5], w9
719 ; CHECK-SD-NEXT: ldr w8, [sp, #384]
720 ; CHECK-SD-NEXT: mov.b v3[5], w10
721 ; CHECK-SD-NEXT: ldr w9, [sp, #256]
722 ; CHECK-SD-NEXT: ldr w10, [sp, #128]
723 ; CHECK-SD-NEXT: mov.b v2[6], w6
724 ; CHECK-SD-NEXT: mov.b v0[6], w8
725 ; CHECK-SD-NEXT: mov.b v1[6], w9
726 ; CHECK-SD-NEXT: ldr w8, [sp, #392]
727 ; CHECK-SD-NEXT: mov.b v3[6], w10
728 ; CHECK-SD-NEXT: ldr w9, [sp, #264]
729 ; CHECK-SD-NEXT: ldr w10, [sp, #136]
730 ; CHECK-SD-NEXT: mov.b v2[7], w7
731 ; CHECK-SD-NEXT: mov.b v0[7], w8
732 ; CHECK-SD-NEXT: mov.b v1[7], w9
733 ; CHECK-SD-NEXT: ldr w8, [sp, #16]
734 ; CHECK-SD-NEXT: mov.b v3[7], w10
735 ; CHECK-SD-NEXT: ldr w9, [sp, #400]
736 ; CHECK-SD-NEXT: ldr w10, [sp, #272]
737 ; CHECK-SD-NEXT: mov.b v2[8], w8
738 ; CHECK-SD-NEXT: ldr w8, [sp, #24]
739 ; CHECK-SD-NEXT: mov.b v0[8], w9
740 ; CHECK-SD-NEXT: mov.b v1[8], w10
741 ; CHECK-SD-NEXT: ldr w9, [sp, #408]
742 ; CHECK-SD-NEXT: mov.b v3[8], w11
743 ; CHECK-SD-NEXT: ldr w10, [sp, #280]
744 ; CHECK-SD-NEXT: ldr w11, [sp, #152]
745 ; CHECK-SD-NEXT: mov.b v2[9], w8
746 ; CHECK-SD-NEXT: ldr w8, [sp, #32]
747 ; CHECK-SD-NEXT: mov.b v0[9], w9
748 ; CHECK-SD-NEXT: mov.b v1[9], w10
749 ; CHECK-SD-NEXT: ldr w9, [sp, #416]
750 ; CHECK-SD-NEXT: mov.b v3[9], w11
751 ; CHECK-SD-NEXT: ldr w10, [sp, #288]
752 ; CHECK-SD-NEXT: ldr w11, [sp, #160]
753 ; CHECK-SD-NEXT: mov.b v2[10], w8
754 ; CHECK-SD-NEXT: ldr w8, [sp, #40]
755 ; CHECK-SD-NEXT: mov.b v0[10], w9
756 ; CHECK-SD-NEXT: mov.b v1[10], w10
757 ; CHECK-SD-NEXT: ldr w9, [sp, #424]
758 ; CHECK-SD-NEXT: mov.b v3[10], w11
759 ; CHECK-SD-NEXT: ldr w10, [sp, #296]
760 ; CHECK-SD-NEXT: ldr w11, [sp, #168]
761 ; CHECK-SD-NEXT: mov.b v2[11], w8
762 ; CHECK-SD-NEXT: ldr w8, [sp, #48]
763 ; CHECK-SD-NEXT: mov.b v0[11], w9
764 ; CHECK-SD-NEXT: mov.b v1[11], w10
765 ; CHECK-SD-NEXT: ldr w9, [sp, #432]
766 ; CHECK-SD-NEXT: mov.b v3[11], w11
767 ; CHECK-SD-NEXT: ldr w10, [sp, #304]
768 ; CHECK-SD-NEXT: ldr w11, [sp, #176]
769 ; CHECK-SD-NEXT: mov.b v2[12], w8
770 ; CHECK-SD-NEXT: ldr w8, [sp, #56]
771 ; CHECK-SD-NEXT: mov.b v0[12], w9
772 ; CHECK-SD-NEXT: mov.b v1[12], w10
773 ; CHECK-SD-NEXT: ldr w9, [sp, #440]
774 ; CHECK-SD-NEXT: mov.b v3[12], w11
775 ; CHECK-SD-NEXT: ldr w10, [sp, #312]
776 ; CHECK-SD-NEXT: ldr w11, [sp, #184]
777 ; CHECK-SD-NEXT: mov.b v2[13], w8
778 ; CHECK-SD-NEXT: ldr w8, [sp, #64]
779 ; CHECK-SD-NEXT: mov.b v0[13], w9
780 ; CHECK-SD-NEXT: mov.b v1[13], w10
781 ; CHECK-SD-NEXT: ldr w9, [sp, #448]
782 ; CHECK-SD-NEXT: mov.b v3[13], w11
783 ; CHECK-SD-NEXT: ldr w10, [sp, #320]
784 ; CHECK-SD-NEXT: ldr w11, [sp, #192]
785 ; CHECK-SD-NEXT: mov.b v2[14], w8
786 ; CHECK-SD-NEXT: ldr w8, [sp, #72]
787 ; CHECK-SD-NEXT: mov.b v0[14], w9
788 ; CHECK-SD-NEXT: mov.b v1[14], w10
789 ; CHECK-SD-NEXT: ldr w9, [sp, #456]
790 ; CHECK-SD-NEXT: mov.b v3[14], w11
791 ; CHECK-SD-NEXT: ldr w10, [sp, #328]
792 ; CHECK-SD-NEXT: ldr w11, [sp, #200]
793 ; CHECK-SD-NEXT: mov.b v2[15], w8
794 ; CHECK-SD-NEXT: mov.b v0[15], w9
795 ; CHECK-SD-NEXT: mov.b v1[15], w10
796 ; CHECK-SD-NEXT: mov.b v3[15], w11
797 ; CHECK-SD-NEXT: shl.16b v2, v2, #7
798 ; CHECK-SD-NEXT: shl.16b v4, v1, #7
799 ; CHECK-SD-NEXT: shl.16b v5, v0, #7
800 ; CHECK-SD-NEXT: shl.16b v3, v3, #7
801 ; CHECK-SD-NEXT: cmlt.16b v0, v2, #0
802 ; CHECK-SD-NEXT: cmlt.16b v2, v4, #0
803 ; CHECK-SD-NEXT: cmlt.16b v1, v3, #0
804 ; CHECK-SD-NEXT: cmlt.16b v3, v5, #0
805 ; CHECK-SD-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
808 ; CHECK-GI-LABEL: sext_v64i1:
809 ; CHECK-GI: // %bb.0:
810 ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
811 ; CHECK-GI-NEXT: str x29, [sp, #16] // 8-byte Folded Spill
812 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
813 ; CHECK-GI-NEXT: .cfi_offset w29, -16
814 ; CHECK-GI-NEXT: .cfi_offset b8, -24
815 ; CHECK-GI-NEXT: .cfi_offset b9, -32
816 ; CHECK-GI-NEXT: ldr s0, [sp, #32]
817 ; CHECK-GI-NEXT: ldr s4, [sp, #40]
818 ; CHECK-GI-NEXT: ldr s2, [sp, #96]
819 ; CHECK-GI-NEXT: ldr s5, [sp, #104]
820 ; CHECK-GI-NEXT: ldr s1, [sp, #64]
821 ; CHECK-GI-NEXT: ldr s23, [sp, #72]
822 ; CHECK-GI-NEXT: mov.s v0[1], v4[0]
823 ; CHECK-GI-NEXT: ldr s28, [sp, #200]
824 ; CHECK-GI-NEXT: ldr s3, [sp, #128]
825 ; CHECK-GI-NEXT: mov.s v2[1], v5[0]
826 ; CHECK-GI-NEXT: mov.s v1[1], v23[0]
827 ; CHECK-GI-NEXT: ldr s5, [sp, #192]
828 ; CHECK-GI-NEXT: ldr s7, [sp, #136]
829 ; CHECK-GI-NEXT: ldr s4, [sp, #160]
830 ; CHECK-GI-NEXT: ldr s24, [sp, #168]
831 ; CHECK-GI-NEXT: mov.s v5[1], v28[0]
832 ; CHECK-GI-NEXT: ldr s6, [sp, #48]
833 ; CHECK-GI-NEXT: ldr s21, [sp, #80]
834 ; CHECK-GI-NEXT: mov.s v3[1], v7[0]
835 ; CHECK-GI-NEXT: mov.s v4[1], v24[0]
836 ; CHECK-GI-NEXT: ldr s16, [sp, #112]
837 ; CHECK-GI-NEXT: ldr s29, [sp, #208]
838 ; CHECK-GI-NEXT: mov.s v0[2], v6[0]
839 ; CHECK-GI-NEXT: mov.s v1[2], v21[0]
840 ; CHECK-GI-NEXT: ldr s6, [sp, #224]
841 ; CHECK-GI-NEXT: ldr s30, [sp, #232]
842 ; CHECK-GI-NEXT: mov.s v2[2], v16[0]
843 ; CHECK-GI-NEXT: ldr s20, [sp, #144]
844 ; CHECK-GI-NEXT: ldr s27, [sp, #176]
845 ; CHECK-GI-NEXT: mov.s v5[2], v29[0]
846 ; CHECK-GI-NEXT: mov.s v6[1], v30[0]
847 ; CHECK-GI-NEXT: ldr s18, [sp, #88]
848 ; CHECK-GI-NEXT: ldr s19, [sp, #120]
849 ; CHECK-GI-NEXT: ldr s7, [sp, #256]
850 ; CHECK-GI-NEXT: ldr s31, [sp, #264]
851 ; CHECK-GI-NEXT: mov.s v3[2], v20[0]
852 ; CHECK-GI-NEXT: mov.s v4[2], v27[0]
853 ; CHECK-GI-NEXT: ldr s25, [sp, #216]
854 ; CHECK-GI-NEXT: ldr s26, [sp, #240]
855 ; CHECK-GI-NEXT: ldr s17, [sp, #56]
856 ; CHECK-GI-NEXT: ldr s22, [sp, #152]
857 ; CHECK-GI-NEXT: mov.s v1[3], v18[0]
858 ; CHECK-GI-NEXT: ldr s23, [sp, #184]
859 ; CHECK-GI-NEXT: mov.s v2[3], v19[0]
860 ; CHECK-GI-NEXT: ldr s18, [sp, #320]
861 ; CHECK-GI-NEXT: ldr s27, [sp, #328]
862 ; CHECK-GI-NEXT: mov.s v7[1], v31[0]
863 ; CHECK-GI-NEXT: ldr s19, [sp, #352]
864 ; CHECK-GI-NEXT: ldr s29, [sp, #360]
865 ; CHECK-GI-NEXT: mov.s v5[3], v25[0]
866 ; CHECK-GI-NEXT: mov.s v6[2], v26[0]
867 ; CHECK-GI-NEXT: fmov s25, w0
868 ; CHECK-GI-NEXT: fmov s26, w4
869 ; CHECK-GI-NEXT: ldr s28, [sp, #272]
870 ; CHECK-GI-NEXT: mov.s v0[3], v17[0]
871 ; CHECK-GI-NEXT: ldr s17, [sp, #288]
872 ; CHECK-GI-NEXT: ldr s8, [sp, #296]
873 ; CHECK-GI-NEXT: mov.s v3[3], v22[0]
874 ; CHECK-GI-NEXT: ldr s20, [sp, #384]
875 ; CHECK-GI-NEXT: mov.s v4[3], v23[0]
876 ; CHECK-GI-NEXT: ldr s30, [sp, #392]
877 ; CHECK-GI-NEXT: ldr s22, [sp, #416]
878 ; CHECK-GI-NEXT: ldr s31, [sp, #424]
879 ; CHECK-GI-NEXT: ldr s23, [sp, #448]
880 ; CHECK-GI-NEXT: mov.s v18[1], v27[0]
881 ; CHECK-GI-NEXT: mov.s v19[1], v29[0]
882 ; CHECK-GI-NEXT: ldr s27, [sp, #456]
883 ; CHECK-GI-NEXT: ldr s24, [sp, #336]
884 ; CHECK-GI-NEXT: mov.s v17[1], v8[0]
885 ; CHECK-GI-NEXT: mov.s v7[2], v28[0]
886 ; CHECK-GI-NEXT: mov.s v25[1], w1
887 ; CHECK-GI-NEXT: mov.s v26[1], w5
888 ; CHECK-GI-NEXT: mov.s v20[1], v30[0]
889 ; CHECK-GI-NEXT: ldr s28, [sp, #368]
890 ; CHECK-GI-NEXT: mov.s v22[1], v31[0]
891 ; CHECK-GI-NEXT: mov.s v23[1], v27[0]
892 ; CHECK-GI-NEXT: ldr s9, [sp, #304]
893 ; CHECK-GI-NEXT: ldr s27, [sp, #400]
894 ; CHECK-GI-NEXT: mov.s v18[2], v24[0]
895 ; CHECK-GI-NEXT: ldr s24, [sp, #432]
896 ; CHECK-GI-NEXT: mov.s v19[2], v28[0]
897 ; CHECK-GI-NEXT: ldr s28, [sp, #464]
898 ; CHECK-GI-NEXT: ldr s16, [sp, #248]
899 ; CHECK-GI-NEXT: ldr s21, [sp, #280]
900 ; CHECK-GI-NEXT: mov.s v17[2], v9[0]
901 ; CHECK-GI-NEXT: mov.s v25[2], w2
902 ; CHECK-GI-NEXT: mov.s v26[2], w6
903 ; CHECK-GI-NEXT: mov.s v20[2], v27[0]
904 ; CHECK-GI-NEXT: mov.s v22[2], v24[0]
905 ; CHECK-GI-NEXT: mov.s v23[2], v28[0]
906 ; CHECK-GI-NEXT: ldr s29, [sp, #312]
907 ; CHECK-GI-NEXT: ldr s27, [sp, #344]
908 ; CHECK-GI-NEXT: ldr s24, [sp, #376]
909 ; CHECK-GI-NEXT: ldr s28, [sp, #408]
910 ; CHECK-GI-NEXT: mov.s v6[3], v16[0]
911 ; CHECK-GI-NEXT: ldr s16, [sp, #440]
912 ; CHECK-GI-NEXT: mov.s v7[3], v21[0]
913 ; CHECK-GI-NEXT: ldr s21, [sp, #472]
914 ; CHECK-GI-NEXT: mov.s v25[3], w3
915 ; CHECK-GI-NEXT: mov.s v26[3], w7
916 ; CHECK-GI-NEXT: mov.s v17[3], v29[0]
917 ; CHECK-GI-NEXT: mov.s v18[3], v27[0]
918 ; CHECK-GI-NEXT: mov.s v19[3], v24[0]
919 ; CHECK-GI-NEXT: mov.s v20[3], v28[0]
920 ; CHECK-GI-NEXT: mov.s v22[3], v16[0]
921 ; CHECK-GI-NEXT: mov.s v23[3], v21[0]
922 ; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
923 ; CHECK-GI-NEXT: uzp1.8h v1, v2, v3
924 ; CHECK-GI-NEXT: uzp1.8h v2, v4, v5
925 ; CHECK-GI-NEXT: uzp1.8h v3, v6, v7
926 ; CHECK-GI-NEXT: ldr x29, [sp, #16] // 8-byte Folded Reload
927 ; CHECK-GI-NEXT: uzp1.8h v16, v25, v26
928 ; CHECK-GI-NEXT: uzp1.8h v4, v17, v18
929 ; CHECK-GI-NEXT: uzp1.8h v5, v19, v20
930 ; CHECK-GI-NEXT: uzp1.8h v6, v22, v23
931 ; CHECK-GI-NEXT: uzp1.16b v1, v1, v2
932 ; CHECK-GI-NEXT: uzp1.16b v0, v16, v0
933 ; CHECK-GI-NEXT: uzp1.16b v2, v3, v4
934 ; CHECK-GI-NEXT: uzp1.16b v3, v5, v6
935 ; CHECK-GI-NEXT: shl.16b v1, v1, #7
936 ; CHECK-GI-NEXT: shl.16b v0, v0, #7
937 ; CHECK-GI-NEXT: shl.16b v2, v2, #7
938 ; CHECK-GI-NEXT: shl.16b v3, v3, #7
939 ; CHECK-GI-NEXT: sshr.16b v1, v1, #7
940 ; CHECK-GI-NEXT: sshr.16b v0, v0, #7
941 ; CHECK-GI-NEXT: sshr.16b v2, v2, #7
942 ; CHECK-GI-NEXT: sshr.16b v3, v3, #7
943 ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
945 %res = sext <64 x i1> %arg to <64 x i8>
949 ; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
950 define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
951 ; CHECK-SD-LABEL: sext_v1x64:
952 ; CHECK-SD: // %bb.0:
953 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
954 ; CHECK-SD-NEXT: fmov x8, d0
955 ; CHECK-SD-NEXT: asr x1, x8, #63
956 ; CHECK-SD-NEXT: mov.d v0[1], x1
957 ; CHECK-SD-NEXT: fmov x0, d0
960 ; CHECK-GI-LABEL: sext_v1x64:
961 ; CHECK-GI: // %bb.0:
962 ; CHECK-GI-NEXT: fmov x8, d0
963 ; CHECK-GI-NEXT: fmov x0, d0
964 ; CHECK-GI-NEXT: asr x1, x8, #63
966 %res = sext <1 x i64> %arg to <1 x i128>