1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; Test efficient codegen of vector extends up from legal type to 128 bit
6 ; and 256 bit vector types.
8 ; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
9 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v32i1
10 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1
11 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v64i1
17 define <8 x i16> @func1(<8 x i8> %v0) nounwind {
20 ; CHECK-NEXT: ushll.8h v0, v0, #0
22 %r = zext <8 x i8> %v0 to <8 x i16>
26 define <8 x i16> @func2(<8 x i8> %v0) nounwind {
29 ; CHECK-NEXT: sshll.8h v0, v0, #0
31 %r = sext <8 x i8> %v0 to <8 x i16>
35 define <16 x i16> @func3(<16 x i8> %v0) nounwind {
36 ; CHECK-SD-LABEL: func3:
38 ; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
39 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
42 ; CHECK-GI-LABEL: func3:
44 ; CHECK-GI-NEXT: ushll.8h v2, v0, #0
45 ; CHECK-GI-NEXT: ushll2.8h v1, v0, #0
46 ; CHECK-GI-NEXT: mov.16b v0, v2
48 %r = zext <16 x i8> %v0 to <16 x i16>
52 define <16 x i16> @func4(<16 x i8> %v0) nounwind {
53 ; CHECK-SD-LABEL: func4:
55 ; CHECK-SD-NEXT: sshll2.8h v1, v0, #0
56 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
59 ; CHECK-GI-LABEL: func4:
61 ; CHECK-GI-NEXT: sshll.8h v2, v0, #0
62 ; CHECK-GI-NEXT: sshll2.8h v1, v0, #0
63 ; CHECK-GI-NEXT: mov.16b v0, v2
65 %r = sext <16 x i8> %v0 to <16 x i16>
73 define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
74 ; CHECK-LABEL: afunc1:
76 ; CHECK-NEXT: ushll.4s v0, v0, #0
78 %r = zext <4 x i16> %v0 to <4 x i32>
82 define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
83 ; CHECK-LABEL: afunc2:
85 ; CHECK-NEXT: sshll.4s v0, v0, #0
87 %r = sext <4 x i16> %v0 to <4 x i32>
91 define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
92 ; CHECK-SD-LABEL: afunc3:
94 ; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
95 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
98 ; CHECK-GI-LABEL: afunc3:
100 ; CHECK-GI-NEXT: ushll.4s v2, v0, #0
101 ; CHECK-GI-NEXT: ushll2.4s v1, v0, #0
102 ; CHECK-GI-NEXT: mov.16b v0, v2
104 %r = zext <8 x i16> %v0 to <8 x i32>
108 define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
109 ; CHECK-SD-LABEL: afunc4:
110 ; CHECK-SD: // %bb.0:
111 ; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
112 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
115 ; CHECK-GI-LABEL: afunc4:
116 ; CHECK-GI: // %bb.0:
117 ; CHECK-GI-NEXT: sshll.4s v2, v0, #0
118 ; CHECK-GI-NEXT: sshll2.4s v1, v0, #0
119 ; CHECK-GI-NEXT: mov.16b v0, v2
121 %r = sext <8 x i16> %v0 to <8 x i32>
125 define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
126 ; CHECK-SD-LABEL: bfunc1:
127 ; CHECK-SD: // %bb.0:
128 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
129 ; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
130 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
133 ; CHECK-GI-LABEL: bfunc1:
134 ; CHECK-GI: // %bb.0:
135 ; CHECK-GI-NEXT: ushll.8h v1, v0, #0
136 ; CHECK-GI-NEXT: ushll.4s v0, v1, #0
137 ; CHECK-GI-NEXT: ushll2.4s v1, v1, #0
139 %r = zext <8 x i8> %v0 to <8 x i32>
143 define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
144 ; CHECK-SD-LABEL: bfunc2:
145 ; CHECK-SD: // %bb.0:
146 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
147 ; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
148 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
151 ; CHECK-GI-LABEL: bfunc2:
152 ; CHECK-GI: // %bb.0:
153 ; CHECK-GI-NEXT: sshll.8h v1, v0, #0
154 ; CHECK-GI-NEXT: sshll.4s v0, v1, #0
155 ; CHECK-GI-NEXT: sshll2.4s v1, v1, #0
157 %r = sext <8 x i8> %v0 to <8 x i32>
165 define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
166 ; CHECK-SD-LABEL: zfunc1:
167 ; CHECK-SD: // %bb.0:
168 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
169 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
172 ; CHECK-GI-LABEL: zfunc1:
173 ; CHECK-GI: // %bb.0:
174 ; CHECK-GI-NEXT: ushll.2d v2, v0, #0
175 ; CHECK-GI-NEXT: ushll2.2d v1, v0, #0
176 ; CHECK-GI-NEXT: mov.16b v0, v2
178 %r = zext <4 x i32> %v0 to <4 x i64>
182 define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
183 ; CHECK-SD-LABEL: zfunc2:
184 ; CHECK-SD: // %bb.0:
185 ; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
186 ; CHECK-SD-NEXT: sshll.2d v0, v0, #0
189 ; CHECK-GI-LABEL: zfunc2:
190 ; CHECK-GI: // %bb.0:
191 ; CHECK-GI-NEXT: sshll.2d v2, v0, #0
192 ; CHECK-GI-NEXT: sshll2.2d v1, v0, #0
193 ; CHECK-GI-NEXT: mov.16b v0, v2
195 %r = sext <4 x i32> %v0 to <4 x i64>
199 define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
200 ; CHECK-SD-LABEL: bfunc3:
201 ; CHECK-SD: // %bb.0:
202 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
203 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
204 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
207 ; CHECK-GI-LABEL: bfunc3:
208 ; CHECK-GI: // %bb.0:
209 ; CHECK-GI-NEXT: ushll.4s v1, v0, #0
210 ; CHECK-GI-NEXT: ushll.2d v0, v1, #0
211 ; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
213 %r = zext <4 x i16> %v0 to <4 x i64>
217 define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
218 ; CHECK-SD-LABEL: cfunc4:
219 ; CHECK-SD: // %bb.0:
220 ; CHECK-SD-NEXT: sshll.4s v0, v0, #0
221 ; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
222 ; CHECK-SD-NEXT: sshll.2d v0, v0, #0
225 ; CHECK-GI-LABEL: cfunc4:
226 ; CHECK-GI: // %bb.0:
227 ; CHECK-GI-NEXT: sshll.4s v1, v0, #0
228 ; CHECK-GI-NEXT: sshll.2d v0, v1, #0
229 ; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
231 %r = sext <4 x i16> %v0 to <4 x i64>
235 define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
236 ; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
237 ; CHECK-SD: // %bb.0:
238 ; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
239 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
240 ; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
241 ; CHECK-SD-NEXT: ushll.2d v0, v0, #0
244 ; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
245 ; CHECK-GI: // %bb.0:
246 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
247 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0
248 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI14_0]
249 ; CHECK-GI-NEXT: ushll.2d v1, v0, #0
250 ; CHECK-GI-NEXT: ushll2.2d v2, v0, #0
251 ; CHECK-GI-NEXT: and.16b v0, v1, v3
252 ; CHECK-GI-NEXT: and.16b v1, v2, v3
254 %r = zext <4 x i8> %v0 to <4 x i64>
258 define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
259 ; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
260 ; CHECK-SD: // %bb.0:
261 ; CHECK-SD-NEXT: ushll.4s v0, v0, #0
262 ; CHECK-SD-NEXT: ushll.2d v1, v0, #0
263 ; CHECK-SD-NEXT: ushll2.2d v0, v0, #0
264 ; CHECK-SD-NEXT: shl.2d v0, v0, #56
265 ; CHECK-SD-NEXT: shl.2d v2, v1, #56
266 ; CHECK-SD-NEXT: sshr.2d v1, v0, #56
267 ; CHECK-SD-NEXT: sshr.2d v0, v2, #56
270 ; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
271 ; CHECK-GI: // %bb.0:
272 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
273 ; CHECK-GI-NEXT: ushll.2d v1, v0, #0
274 ; CHECK-GI-NEXT: ushll2.2d v0, v0, #0
275 ; CHECK-GI-NEXT: shl.2d v1, v1, #56
276 ; CHECK-GI-NEXT: shl.2d v2, v0, #56
277 ; CHECK-GI-NEXT: sshr.2d v0, v1, #56
278 ; CHECK-GI-NEXT: sshr.2d v1, v2, #56
280 %r = sext <4 x i8> %v0 to <4 x i64>
284 define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
285 ; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
286 ; CHECK-SD: // %bb.0:
287 ; CHECK-SD-NEXT: ushll.8h v0, v0, #0
288 ; CHECK-SD-NEXT: ushll.4s v1, v0, #0
289 ; CHECK-SD-NEXT: ushll2.4s v2, v0, #0
290 ; CHECK-SD-NEXT: ushll.2d v0, v1, #0
291 ; CHECK-SD-NEXT: ushll2.2d v3, v2, #0
292 ; CHECK-SD-NEXT: ushll2.2d v1, v1, #0
293 ; CHECK-SD-NEXT: ushll.2d v2, v2, #0
296 ; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
297 ; CHECK-GI: // %bb.0:
298 ; CHECK-GI-NEXT: ushll.8h v0, v0, #0
299 ; CHECK-GI-NEXT: ushll.4s v1, v0, #0
300 ; CHECK-GI-NEXT: ushll2.4s v3, v0, #0
301 ; CHECK-GI-NEXT: ushll.2d v0, v1, #0
302 ; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
303 ; CHECK-GI-NEXT: ushll.2d v2, v3, #0
304 ; CHECK-GI-NEXT: ushll2.2d v3, v3, #0
306 %r = zext <8 x i8> %v0 to <8 x i64>
310 define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
311 ; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
312 ; CHECK-SD: // %bb.0:
313 ; CHECK-SD-NEXT: sshll.8h v0, v0, #0
314 ; CHECK-SD-NEXT: sshll.4s v1, v0, #0
315 ; CHECK-SD-NEXT: sshll2.4s v2, v0, #0
316 ; CHECK-SD-NEXT: sshll.2d v0, v1, #0
317 ; CHECK-SD-NEXT: sshll2.2d v3, v2, #0
318 ; CHECK-SD-NEXT: sshll2.2d v1, v1, #0
319 ; CHECK-SD-NEXT: sshll.2d v2, v2, #0
322 ; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
323 ; CHECK-GI: // %bb.0:
324 ; CHECK-GI-NEXT: sshll.8h v0, v0, #0
325 ; CHECK-GI-NEXT: sshll.4s v1, v0, #0
326 ; CHECK-GI-NEXT: sshll2.4s v3, v0, #0
327 ; CHECK-GI-NEXT: sshll.2d v0, v1, #0
328 ; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
329 ; CHECK-GI-NEXT: sshll.2d v2, v3, #0
330 ; CHECK-GI-NEXT: sshll2.2d v3, v3, #0
332 %r = sext <8 x i8> %v0 to <8 x i64>
336 ; Extends of vectors of i1.
338 define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
339 ; CHECK-LABEL: zext_v32i1:
341 ; CHECK-NEXT: ldr w8, [sp, #64]
342 ; CHECK-NEXT: fmov s0, w0
343 ; CHECK-NEXT: ldr w9, [sp, #72]
344 ; CHECK-NEXT: movi.16b v2, #1
345 ; CHECK-NEXT: fmov s1, w8
346 ; CHECK-NEXT: ldr w8, [sp, #80]
347 ; CHECK-NEXT: mov.b v0[1], w1
348 ; CHECK-NEXT: mov.b v1[1], w9
349 ; CHECK-NEXT: ldr w9, [sp]
350 ; CHECK-NEXT: mov.b v0[2], w2
351 ; CHECK-NEXT: mov.b v1[2], w8
352 ; CHECK-NEXT: ldr w8, [sp, #88]
353 ; CHECK-NEXT: mov.b v0[3], w3
354 ; CHECK-NEXT: mov.b v1[3], w8
355 ; CHECK-NEXT: ldr w8, [sp, #96]
356 ; CHECK-NEXT: mov.b v0[4], w4
357 ; CHECK-NEXT: mov.b v1[4], w8
358 ; CHECK-NEXT: ldr w8, [sp, #104]
359 ; CHECK-NEXT: mov.b v0[5], w5
360 ; CHECK-NEXT: mov.b v1[5], w8
361 ; CHECK-NEXT: ldr w8, [sp, #112]
362 ; CHECK-NEXT: mov.b v0[6], w6
363 ; CHECK-NEXT: mov.b v1[6], w8
364 ; CHECK-NEXT: ldr w8, [sp, #120]
365 ; CHECK-NEXT: mov.b v0[7], w7
366 ; CHECK-NEXT: mov.b v1[7], w8
367 ; CHECK-NEXT: ldr w8, [sp, #128]
368 ; CHECK-NEXT: mov.b v0[8], w9
369 ; CHECK-NEXT: ldr w9, [sp, #8]
370 ; CHECK-NEXT: mov.b v1[8], w8
371 ; CHECK-NEXT: ldr w8, [sp, #136]
372 ; CHECK-NEXT: mov.b v0[9], w9
373 ; CHECK-NEXT: ldr w9, [sp, #16]
374 ; CHECK-NEXT: mov.b v1[9], w8
375 ; CHECK-NEXT: ldr w8, [sp, #144]
376 ; CHECK-NEXT: mov.b v0[10], w9
377 ; CHECK-NEXT: ldr w9, [sp, #24]
378 ; CHECK-NEXT: mov.b v1[10], w8
379 ; CHECK-NEXT: ldr w8, [sp, #152]
380 ; CHECK-NEXT: mov.b v0[11], w9
381 ; CHECK-NEXT: ldr w9, [sp, #32]
382 ; CHECK-NEXT: mov.b v1[11], w8
383 ; CHECK-NEXT: ldr w8, [sp, #160]
384 ; CHECK-NEXT: mov.b v0[12], w9
385 ; CHECK-NEXT: ldr w9, [sp, #40]
386 ; CHECK-NEXT: mov.b v1[12], w8
387 ; CHECK-NEXT: ldr w8, [sp, #168]
388 ; CHECK-NEXT: mov.b v0[13], w9
389 ; CHECK-NEXT: ldr w9, [sp, #48]
390 ; CHECK-NEXT: mov.b v1[13], w8
391 ; CHECK-NEXT: ldr w8, [sp, #176]
392 ; CHECK-NEXT: mov.b v0[14], w9
393 ; CHECK-NEXT: ldr w9, [sp, #56]
394 ; CHECK-NEXT: mov.b v1[14], w8
395 ; CHECK-NEXT: ldr w8, [sp, #184]
396 ; CHECK-NEXT: mov.b v0[15], w9
397 ; CHECK-NEXT: mov.b v1[15], w8
398 ; CHECK-NEXT: and.16b v0, v0, v2
399 ; CHECK-NEXT: and.16b v1, v1, v2
401 %res = zext <32 x i1> %arg to <32 x i8>
405 define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
406 ; CHECK-LABEL: sext_v32i1:
408 ; CHECK-NEXT: ldr w8, [sp, #64]
409 ; CHECK-NEXT: fmov s1, w0
410 ; CHECK-NEXT: ldr w9, [sp, #72]
411 ; CHECK-NEXT: fmov s0, w8
412 ; CHECK-NEXT: ldr w8, [sp, #80]
413 ; CHECK-NEXT: mov.b v1[1], w1
414 ; CHECK-NEXT: mov.b v0[1], w9
415 ; CHECK-NEXT: ldr w9, [sp]
416 ; CHECK-NEXT: mov.b v1[2], w2
417 ; CHECK-NEXT: mov.b v0[2], w8
418 ; CHECK-NEXT: ldr w8, [sp, #88]
419 ; CHECK-NEXT: mov.b v1[3], w3
420 ; CHECK-NEXT: mov.b v0[3], w8
421 ; CHECK-NEXT: ldr w8, [sp, #96]
422 ; CHECK-NEXT: mov.b v1[4], w4
423 ; CHECK-NEXT: mov.b v0[4], w8
424 ; CHECK-NEXT: ldr w8, [sp, #104]
425 ; CHECK-NEXT: mov.b v1[5], w5
426 ; CHECK-NEXT: mov.b v0[5], w8
427 ; CHECK-NEXT: ldr w8, [sp, #112]
428 ; CHECK-NEXT: mov.b v1[6], w6
429 ; CHECK-NEXT: mov.b v0[6], w8
430 ; CHECK-NEXT: ldr w8, [sp, #120]
431 ; CHECK-NEXT: mov.b v1[7], w7
432 ; CHECK-NEXT: mov.b v0[7], w8
433 ; CHECK-NEXT: ldr w8, [sp, #128]
434 ; CHECK-NEXT: mov.b v1[8], w9
435 ; CHECK-NEXT: ldr w9, [sp, #8]
436 ; CHECK-NEXT: mov.b v0[8], w8
437 ; CHECK-NEXT: ldr w8, [sp, #136]
438 ; CHECK-NEXT: mov.b v1[9], w9
439 ; CHECK-NEXT: ldr w9, [sp, #16]
440 ; CHECK-NEXT: mov.b v0[9], w8
441 ; CHECK-NEXT: ldr w8, [sp, #144]
442 ; CHECK-NEXT: mov.b v1[10], w9
443 ; CHECK-NEXT: ldr w9, [sp, #24]
444 ; CHECK-NEXT: mov.b v0[10], w8
445 ; CHECK-NEXT: ldr w8, [sp, #152]
446 ; CHECK-NEXT: mov.b v1[11], w9
447 ; CHECK-NEXT: ldr w9, [sp, #32]
448 ; CHECK-NEXT: mov.b v0[11], w8
449 ; CHECK-NEXT: ldr w8, [sp, #160]
450 ; CHECK-NEXT: mov.b v1[12], w9
451 ; CHECK-NEXT: ldr w9, [sp, #40]
452 ; CHECK-NEXT: mov.b v0[12], w8
453 ; CHECK-NEXT: ldr w8, [sp, #168]
454 ; CHECK-NEXT: mov.b v1[13], w9
455 ; CHECK-NEXT: ldr w9, [sp, #48]
456 ; CHECK-NEXT: mov.b v0[13], w8
457 ; CHECK-NEXT: ldr w8, [sp, #176]
458 ; CHECK-NEXT: mov.b v1[14], w9
459 ; CHECK-NEXT: ldr w9, [sp, #56]
460 ; CHECK-NEXT: mov.b v0[14], w8
461 ; CHECK-NEXT: ldr w8, [sp, #184]
462 ; CHECK-NEXT: mov.b v1[15], w9
463 ; CHECK-NEXT: mov.b v0[15], w8
464 ; CHECK-NEXT: shl.16b v1, v1, #7
465 ; CHECK-NEXT: shl.16b v2, v0, #7
466 ; CHECK-NEXT: cmlt.16b v0, v1, #0
467 ; CHECK-NEXT: cmlt.16b v1, v2, #0
469 %res = sext <32 x i1> %arg to <32 x i8>
473 define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
474 ; CHECK-LABEL: zext_v64i1:
476 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
477 ; CHECK-NEXT: .cfi_def_cfa_offset 16
478 ; CHECK-NEXT: .cfi_offset w29, -16
479 ; CHECK-NEXT: ldr w8, [sp, #336]
480 ; CHECK-NEXT: ldr w9, [sp, #208]
481 ; CHECK-NEXT: fmov s0, w0
482 ; CHECK-NEXT: ldr w10, [sp, #80]
483 ; CHECK-NEXT: ldr w11, [sp, #216]
484 ; CHECK-NEXT: movi.16b v4, #1
485 ; CHECK-NEXT: fmov s3, w8
486 ; CHECK-NEXT: fmov s2, w9
487 ; CHECK-NEXT: ldr w8, [sp, #344]
488 ; CHECK-NEXT: fmov s1, w10
489 ; CHECK-NEXT: ldr w12, [sp, #88]
490 ; CHECK-NEXT: mov.b v0[1], w1
491 ; CHECK-NEXT: ldr w9, [sp, #224]
492 ; CHECK-NEXT: ldr w10, [sp, #96]
493 ; CHECK-NEXT: mov.b v3[1], w8
494 ; CHECK-NEXT: mov.b v2[1], w11
495 ; CHECK-NEXT: ldr w8, [sp, #352]
496 ; CHECK-NEXT: mov.b v1[1], w12
497 ; CHECK-NEXT: ldr w11, [sp, #144]
498 ; CHECK-NEXT: mov.b v0[2], w2
499 ; CHECK-NEXT: mov.b v3[2], w8
500 ; CHECK-NEXT: mov.b v2[2], w9
501 ; CHECK-NEXT: ldr w8, [sp, #360]
502 ; CHECK-NEXT: mov.b v1[2], w10
503 ; CHECK-NEXT: ldr w9, [sp, #232]
504 ; CHECK-NEXT: ldr w10, [sp, #104]
505 ; CHECK-NEXT: mov.b v0[3], w3
506 ; CHECK-NEXT: mov.b v3[3], w8
507 ; CHECK-NEXT: mov.b v2[3], w9
508 ; CHECK-NEXT: ldr w8, [sp, #368]
509 ; CHECK-NEXT: mov.b v1[3], w10
510 ; CHECK-NEXT: ldr w9, [sp, #240]
511 ; CHECK-NEXT: ldr w10, [sp, #112]
512 ; CHECK-NEXT: mov.b v0[4], w4
513 ; CHECK-NEXT: mov.b v3[4], w8
514 ; CHECK-NEXT: mov.b v2[4], w9
515 ; CHECK-NEXT: ldr w8, [sp, #376]
516 ; CHECK-NEXT: mov.b v1[4], w10
517 ; CHECK-NEXT: ldr w9, [sp, #248]
518 ; CHECK-NEXT: ldr w10, [sp, #120]
519 ; CHECK-NEXT: mov.b v0[5], w5
520 ; CHECK-NEXT: mov.b v3[5], w8
521 ; CHECK-NEXT: mov.b v2[5], w9
522 ; CHECK-NEXT: ldr w8, [sp, #384]
523 ; CHECK-NEXT: mov.b v1[5], w10
524 ; CHECK-NEXT: ldr w9, [sp, #256]
525 ; CHECK-NEXT: ldr w10, [sp, #128]
526 ; CHECK-NEXT: mov.b v0[6], w6
527 ; CHECK-NEXT: mov.b v3[6], w8
528 ; CHECK-NEXT: mov.b v2[6], w9
529 ; CHECK-NEXT: ldr w8, [sp, #392]
530 ; CHECK-NEXT: mov.b v1[6], w10
531 ; CHECK-NEXT: ldr w9, [sp, #264]
532 ; CHECK-NEXT: ldr w10, [sp, #136]
533 ; CHECK-NEXT: mov.b v0[7], w7
534 ; CHECK-NEXT: mov.b v3[7], w8
535 ; CHECK-NEXT: mov.b v2[7], w9
536 ; CHECK-NEXT: ldr w8, [sp, #16]
537 ; CHECK-NEXT: mov.b v1[7], w10
538 ; CHECK-NEXT: ldr w9, [sp, #400]
539 ; CHECK-NEXT: ldr w10, [sp, #272]
540 ; CHECK-NEXT: mov.b v0[8], w8
541 ; CHECK-NEXT: ldr w8, [sp, #24]
542 ; CHECK-NEXT: mov.b v3[8], w9
543 ; CHECK-NEXT: mov.b v2[8], w10
544 ; CHECK-NEXT: ldr w9, [sp, #408]
545 ; CHECK-NEXT: mov.b v1[8], w11
546 ; CHECK-NEXT: ldr w10, [sp, #280]
547 ; CHECK-NEXT: ldr w11, [sp, #152]
548 ; CHECK-NEXT: mov.b v0[9], w8
549 ; CHECK-NEXT: ldr w8, [sp, #32]
550 ; CHECK-NEXT: mov.b v3[9], w9
551 ; CHECK-NEXT: mov.b v2[9], w10
552 ; CHECK-NEXT: ldr w9, [sp, #416]
553 ; CHECK-NEXT: mov.b v1[9], w11
554 ; CHECK-NEXT: ldr w10, [sp, #288]
555 ; CHECK-NEXT: ldr w11, [sp, #160]
556 ; CHECK-NEXT: mov.b v0[10], w8
557 ; CHECK-NEXT: ldr w8, [sp, #40]
558 ; CHECK-NEXT: mov.b v3[10], w9
559 ; CHECK-NEXT: mov.b v2[10], w10
560 ; CHECK-NEXT: ldr w9, [sp, #424]
561 ; CHECK-NEXT: mov.b v1[10], w11
562 ; CHECK-NEXT: ldr w10, [sp, #296]
563 ; CHECK-NEXT: ldr w11, [sp, #168]
564 ; CHECK-NEXT: mov.b v0[11], w8
565 ; CHECK-NEXT: ldr w8, [sp, #48]
566 ; CHECK-NEXT: mov.b v3[11], w9
567 ; CHECK-NEXT: mov.b v2[11], w10
568 ; CHECK-NEXT: ldr w9, [sp, #432]
569 ; CHECK-NEXT: mov.b v1[11], w11
570 ; CHECK-NEXT: ldr w10, [sp, #304]
571 ; CHECK-NEXT: ldr w11, [sp, #176]
572 ; CHECK-NEXT: mov.b v0[12], w8
573 ; CHECK-NEXT: ldr w8, [sp, #56]
574 ; CHECK-NEXT: mov.b v3[12], w9
575 ; CHECK-NEXT: mov.b v2[12], w10
576 ; CHECK-NEXT: ldr w9, [sp, #440]
577 ; CHECK-NEXT: mov.b v1[12], w11
578 ; CHECK-NEXT: ldr w10, [sp, #312]
579 ; CHECK-NEXT: ldr w11, [sp, #184]
580 ; CHECK-NEXT: mov.b v0[13], w8
581 ; CHECK-NEXT: ldr w8, [sp, #64]
582 ; CHECK-NEXT: mov.b v3[13], w9
583 ; CHECK-NEXT: mov.b v2[13], w10
584 ; CHECK-NEXT: ldr w9, [sp, #448]
585 ; CHECK-NEXT: mov.b v1[13], w11
586 ; CHECK-NEXT: ldr w10, [sp, #320]
587 ; CHECK-NEXT: ldr w11, [sp, #192]
588 ; CHECK-NEXT: mov.b v0[14], w8
589 ; CHECK-NEXT: ldr w8, [sp, #72]
590 ; CHECK-NEXT: mov.b v3[14], w9
591 ; CHECK-NEXT: mov.b v2[14], w10
592 ; CHECK-NEXT: ldr w9, [sp, #456]
593 ; CHECK-NEXT: mov.b v1[14], w11
594 ; CHECK-NEXT: ldr w10, [sp, #328]
595 ; CHECK-NEXT: ldr w11, [sp, #200]
596 ; CHECK-NEXT: mov.b v0[15], w8
597 ; CHECK-NEXT: mov.b v3[15], w9
598 ; CHECK-NEXT: mov.b v2[15], w10
599 ; CHECK-NEXT: mov.b v1[15], w11
600 ; CHECK-NEXT: and.16b v0, v0, v4
601 ; CHECK-NEXT: and.16b v2, v2, v4
602 ; CHECK-NEXT: and.16b v3, v3, v4
603 ; CHECK-NEXT: and.16b v1, v1, v4
604 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
606 %res = zext <64 x i1> %arg to <64 x i8>
610 define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
611 ; CHECK-LABEL: sext_v64i1:
613 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
614 ; CHECK-NEXT: .cfi_def_cfa_offset 16
615 ; CHECK-NEXT: .cfi_offset w29, -16
616 ; CHECK-NEXT: ldr w8, [sp, #336]
617 ; CHECK-NEXT: ldr w9, [sp, #208]
618 ; CHECK-NEXT: fmov s2, w0
619 ; CHECK-NEXT: ldr w10, [sp, #80]
620 ; CHECK-NEXT: ldr w11, [sp, #216]
621 ; CHECK-NEXT: ldr w12, [sp, #88]
622 ; CHECK-NEXT: fmov s0, w8
623 ; CHECK-NEXT: fmov s1, w9
624 ; CHECK-NEXT: ldr w8, [sp, #344]
625 ; CHECK-NEXT: fmov s3, w10
626 ; CHECK-NEXT: mov.b v2[1], w1
627 ; CHECK-NEXT: ldr w9, [sp, #224]
628 ; CHECK-NEXT: ldr w10, [sp, #96]
629 ; CHECK-NEXT: mov.b v0[1], w8
630 ; CHECK-NEXT: mov.b v1[1], w11
631 ; CHECK-NEXT: ldr w8, [sp, #352]
632 ; CHECK-NEXT: mov.b v3[1], w12
633 ; CHECK-NEXT: ldr w11, [sp, #144]
634 ; CHECK-NEXT: mov.b v2[2], w2
635 ; CHECK-NEXT: mov.b v0[2], w8
636 ; CHECK-NEXT: mov.b v1[2], w9
637 ; CHECK-NEXT: ldr w8, [sp, #360]
638 ; CHECK-NEXT: mov.b v3[2], w10
639 ; CHECK-NEXT: ldr w9, [sp, #232]
640 ; CHECK-NEXT: ldr w10, [sp, #104]
641 ; CHECK-NEXT: mov.b v2[3], w3
642 ; CHECK-NEXT: mov.b v0[3], w8
643 ; CHECK-NEXT: mov.b v1[3], w9
644 ; CHECK-NEXT: ldr w8, [sp, #368]
645 ; CHECK-NEXT: mov.b v3[3], w10
646 ; CHECK-NEXT: ldr w9, [sp, #240]
647 ; CHECK-NEXT: ldr w10, [sp, #112]
648 ; CHECK-NEXT: mov.b v2[4], w4
649 ; CHECK-NEXT: mov.b v0[4], w8
650 ; CHECK-NEXT: mov.b v1[4], w9
651 ; CHECK-NEXT: ldr w8, [sp, #376]
652 ; CHECK-NEXT: mov.b v3[4], w10
653 ; CHECK-NEXT: ldr w9, [sp, #248]
654 ; CHECK-NEXT: ldr w10, [sp, #120]
655 ; CHECK-NEXT: mov.b v2[5], w5
656 ; CHECK-NEXT: mov.b v0[5], w8
657 ; CHECK-NEXT: mov.b v1[5], w9
658 ; CHECK-NEXT: ldr w8, [sp, #384]
659 ; CHECK-NEXT: mov.b v3[5], w10
660 ; CHECK-NEXT: ldr w9, [sp, #256]
661 ; CHECK-NEXT: ldr w10, [sp, #128]
662 ; CHECK-NEXT: mov.b v2[6], w6
663 ; CHECK-NEXT: mov.b v0[6], w8
664 ; CHECK-NEXT: mov.b v1[6], w9
665 ; CHECK-NEXT: ldr w8, [sp, #392]
666 ; CHECK-NEXT: mov.b v3[6], w10
667 ; CHECK-NEXT: ldr w9, [sp, #264]
668 ; CHECK-NEXT: ldr w10, [sp, #136]
669 ; CHECK-NEXT: mov.b v2[7], w7
670 ; CHECK-NEXT: mov.b v0[7], w8
671 ; CHECK-NEXT: mov.b v1[7], w9
672 ; CHECK-NEXT: ldr w8, [sp, #16]
673 ; CHECK-NEXT: mov.b v3[7], w10
674 ; CHECK-NEXT: ldr w9, [sp, #400]
675 ; CHECK-NEXT: ldr w10, [sp, #272]
676 ; CHECK-NEXT: mov.b v2[8], w8
677 ; CHECK-NEXT: ldr w8, [sp, #24]
678 ; CHECK-NEXT: mov.b v0[8], w9
679 ; CHECK-NEXT: mov.b v1[8], w10
680 ; CHECK-NEXT: ldr w9, [sp, #408]
681 ; CHECK-NEXT: mov.b v3[8], w11
682 ; CHECK-NEXT: ldr w10, [sp, #280]
683 ; CHECK-NEXT: ldr w11, [sp, #152]
684 ; CHECK-NEXT: mov.b v2[9], w8
685 ; CHECK-NEXT: ldr w8, [sp, #32]
686 ; CHECK-NEXT: mov.b v0[9], w9
687 ; CHECK-NEXT: mov.b v1[9], w10
688 ; CHECK-NEXT: ldr w9, [sp, #416]
689 ; CHECK-NEXT: mov.b v3[9], w11
690 ; CHECK-NEXT: ldr w10, [sp, #288]
691 ; CHECK-NEXT: ldr w11, [sp, #160]
692 ; CHECK-NEXT: mov.b v2[10], w8
693 ; CHECK-NEXT: ldr w8, [sp, #40]
694 ; CHECK-NEXT: mov.b v0[10], w9
695 ; CHECK-NEXT: mov.b v1[10], w10
696 ; CHECK-NEXT: ldr w9, [sp, #424]
697 ; CHECK-NEXT: mov.b v3[10], w11
698 ; CHECK-NEXT: ldr w10, [sp, #296]
699 ; CHECK-NEXT: ldr w11, [sp, #168]
700 ; CHECK-NEXT: mov.b v2[11], w8
701 ; CHECK-NEXT: ldr w8, [sp, #48]
702 ; CHECK-NEXT: mov.b v0[11], w9
703 ; CHECK-NEXT: mov.b v1[11], w10
704 ; CHECK-NEXT: ldr w9, [sp, #432]
705 ; CHECK-NEXT: mov.b v3[11], w11
706 ; CHECK-NEXT: ldr w10, [sp, #304]
707 ; CHECK-NEXT: ldr w11, [sp, #176]
708 ; CHECK-NEXT: mov.b v2[12], w8
709 ; CHECK-NEXT: ldr w8, [sp, #56]
710 ; CHECK-NEXT: mov.b v0[12], w9
711 ; CHECK-NEXT: mov.b v1[12], w10
712 ; CHECK-NEXT: ldr w9, [sp, #440]
713 ; CHECK-NEXT: mov.b v3[12], w11
714 ; CHECK-NEXT: ldr w10, [sp, #312]
715 ; CHECK-NEXT: ldr w11, [sp, #184]
716 ; CHECK-NEXT: mov.b v2[13], w8
717 ; CHECK-NEXT: ldr w8, [sp, #64]
718 ; CHECK-NEXT: mov.b v0[13], w9
719 ; CHECK-NEXT: mov.b v1[13], w10
720 ; CHECK-NEXT: ldr w9, [sp, #448]
721 ; CHECK-NEXT: mov.b v3[13], w11
722 ; CHECK-NEXT: ldr w10, [sp, #320]
723 ; CHECK-NEXT: ldr w11, [sp, #192]
724 ; CHECK-NEXT: mov.b v2[14], w8
725 ; CHECK-NEXT: ldr w8, [sp, #72]
726 ; CHECK-NEXT: mov.b v0[14], w9
727 ; CHECK-NEXT: mov.b v1[14], w10
728 ; CHECK-NEXT: ldr w9, [sp, #456]
729 ; CHECK-NEXT: mov.b v3[14], w11
730 ; CHECK-NEXT: ldr w10, [sp, #328]
731 ; CHECK-NEXT: ldr w11, [sp, #200]
732 ; CHECK-NEXT: mov.b v2[15], w8
733 ; CHECK-NEXT: mov.b v0[15], w9
734 ; CHECK-NEXT: mov.b v1[15], w10
735 ; CHECK-NEXT: mov.b v3[15], w11
736 ; CHECK-NEXT: shl.16b v2, v2, #7
737 ; CHECK-NEXT: shl.16b v4, v1, #7
738 ; CHECK-NEXT: shl.16b v5, v0, #7
739 ; CHECK-NEXT: shl.16b v3, v3, #7
740 ; CHECK-NEXT: cmlt.16b v0, v2, #0
741 ; CHECK-NEXT: cmlt.16b v2, v4, #0
742 ; CHECK-NEXT: cmlt.16b v1, v3, #0
743 ; CHECK-NEXT: cmlt.16b v3, v5, #0
744 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
746 %res = sext <64 x i1> %arg to <64 x i8>
750 ; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
751 define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
752 ; CHECK-SD-LABEL: sext_v1x64:
753 ; CHECK-SD: // %bb.0:
754 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
755 ; CHECK-SD-NEXT: fmov x8, d0
756 ; CHECK-SD-NEXT: asr x1, x8, #63
757 ; CHECK-SD-NEXT: mov.d v0[1], x1
758 ; CHECK-SD-NEXT: fmov x0, d0
761 ; CHECK-GI-LABEL: sext_v1x64:
762 ; CHECK-GI: // %bb.0:
763 ; CHECK-GI-NEXT: fmov x8, d0
764 ; CHECK-GI-NEXT: fmov x0, d0
765 ; CHECK-GI-NEXT: asr x1, x8, #63
767 %res = sext <1 x i64> %arg to <1 x i128>