1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) #0 {
7 ; CHECK-LABEL: test_lane0_16xi8:
9 ; CHECK-NEXT: fmov w0, s0
11 %b = extractelement <vscale x 16 x i8> %a, i32 0
15 define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
16 ; CHECK-LABEL: test_lane15_16xi8:
18 ; CHECK-NEXT: umov w0, v0.b[15]
20 %b = extractelement <vscale x 16 x i8> %a, i32 15
24 define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
25 ; CHECK-LABEL: test_lane16_16xi8:
27 ; CHECK-NEXT: mov z0.b, z0.b[16]
28 ; CHECK-NEXT: fmov w0, s0
30 %b = extractelement <vscale x 16 x i8> %a, i32 16
34 define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
35 ; CHECK-LABEL: test_lane0_8xi16:
37 ; CHECK-NEXT: fmov w0, s0
39 %b = extractelement <vscale x 8 x i16> %a, i32 0
43 define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
44 ; CHECK-LABEL: test_lane7_8xi16:
46 ; CHECK-NEXT: umov w0, v0.h[7]
48 %b = extractelement <vscale x 8 x i16> %a, i32 7
52 define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
53 ; CHECK-LABEL: test_lane8_8xi16:
55 ; CHECK-NEXT: mov z0.h, z0.h[8]
56 ; CHECK-NEXT: fmov w0, s0
58 %b = extractelement <vscale x 8 x i16> %a, i32 8
62 define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) #0 {
63 ; CHECK-LABEL: test_lane0_4xi32:
65 ; CHECK-NEXT: fmov w0, s0
67 %b = extractelement <vscale x 4 x i32> %a, i32 0
71 define i32 @test_lane3_4xi32(<vscale x 4 x i32> %a) #0 {
72 ; CHECK-LABEL: test_lane3_4xi32:
74 ; CHECK-NEXT: mov w0, v0.s[3]
76 %b = extractelement <vscale x 4 x i32> %a, i32 3
80 define i32 @test_lane4_4xi32(<vscale x 4 x i32> %a) #0 {
81 ; CHECK-LABEL: test_lane4_4xi32:
83 ; CHECK-NEXT: mov z0.s, z0.s[4]
84 ; CHECK-NEXT: fmov w0, s0
86 %b = extractelement <vscale x 4 x i32> %a, i32 4
90 define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) #0 {
91 ; CHECK-LABEL: test_lane0_2xi64:
93 ; CHECK-NEXT: fmov x0, d0
95 %b = extractelement <vscale x 2 x i64> %a, i32 0
99 define i64 @test_lane1_2xi64(<vscale x 2 x i64> %a) #0 {
100 ; CHECK-LABEL: test_lane1_2xi64:
102 ; CHECK-NEXT: mov x0, v0.d[1]
104 %b = extractelement <vscale x 2 x i64> %a, i32 1
108 define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
109 ; CHECK-LABEL: test_lane2_2xi64:
111 ; CHECK-NEXT: mov z0.d, z0.d[2]
112 ; CHECK-NEXT: fmov x0, d0
114 %b = extractelement <vscale x 2 x i64> %a, i32 2
118 define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
119 ; CHECK-LABEL: test_lane0_8xf16:
121 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
123 %b = extractelement <vscale x 8 x half> %a, i32 0
127 define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
128 ; CHECK-LABEL: test_lane7_8xf16:
130 ; CHECK-NEXT: mov z0.h, z0.h[7]
131 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
133 %b = extractelement <vscale x 8 x half> %a, i32 7
137 define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
138 ; CHECK-LABEL: test_lane8_8xf16:
140 ; CHECK-NEXT: mov z0.h, z0.h[8]
141 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
143 %b = extractelement <vscale x 8 x half> %a, i32 8
147 define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
148 ; CHECK-LABEL: test_lane0_4xf16:
150 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
152 %b = extractelement <vscale x 4 x half> %a, i32 0
156 define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
157 ; CHECK-LABEL: test_lane3_4xf16:
159 ; CHECK-NEXT: mov z0.s, z0.s[3]
160 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
162 %b = extractelement <vscale x 4 x half> %a, i32 3
166 define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
167 ; CHECK-LABEL: test_lane4_4xf16:
169 ; CHECK-NEXT: mov z0.s, z0.s[4]
170 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
172 %b = extractelement <vscale x 4 x half> %a, i32 4
176 define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
177 ; CHECK-LABEL: test_lane0_2xf16:
179 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
181 %b = extractelement <vscale x 2 x half> %a, i32 0
185 define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
186 ; CHECK-LABEL: test_lane1_2xf16:
188 ; CHECK-NEXT: mov z0.d, z0.d[1]
189 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
191 %b = extractelement <vscale x 2 x half> %a, i32 1
195 define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
196 ; CHECK-LABEL: test_lane2_2xf16:
198 ; CHECK-NEXT: mov z0.d, z0.d[2]
199 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
201 %b = extractelement <vscale x 2 x half> %a, i32 2
205 define bfloat @test_lane0_8xbf16(<vscale x 8 x bfloat> %a) #0 {
206 ; CHECK-LABEL: test_lane0_8xbf16:
208 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
210 %b = extractelement <vscale x 8 x bfloat> %a, i32 0
214 define bfloat @test_lane7_8xbf16(<vscale x 8 x bfloat> %a) #0 {
215 ; CHECK-LABEL: test_lane7_8xbf16:
217 ; CHECK-NEXT: mov z0.h, z0.h[7]
218 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
220 %b = extractelement <vscale x 8 x bfloat> %a, i32 7
224 define bfloat @test_lane8_8xbf16(<vscale x 8 x bfloat> %a) #0 {
225 ; CHECK-LABEL: test_lane8_8xbf16:
227 ; CHECK-NEXT: mov z0.h, z0.h[8]
228 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
230 %b = extractelement <vscale x 8 x bfloat> %a, i32 8
234 define bfloat @test_lane0_4xbf16(<vscale x 4 x bfloat> %a) #0 {
235 ; CHECK-LABEL: test_lane0_4xbf16:
237 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
239 %b = extractelement <vscale x 4 x bfloat> %a, i32 0
243 define bfloat @test_lane3_4xbf16(<vscale x 4 x bfloat> %a) #0 {
244 ; CHECK-LABEL: test_lane3_4xbf16:
246 ; CHECK-NEXT: mov z0.s, z0.s[3]
247 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
249 %b = extractelement <vscale x 4 x bfloat> %a, i32 3
253 define bfloat @test_lane4_4xbf16(<vscale x 4 x bfloat> %a) #0 {
254 ; CHECK-LABEL: test_lane4_4xbf16:
256 ; CHECK-NEXT: mov z0.s, z0.s[4]
257 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
259 %b = extractelement <vscale x 4 x bfloat> %a, i32 4
263 define bfloat @test_lane0_2xbf16(<vscale x 2 x bfloat> %a) #0 {
264 ; CHECK-LABEL: test_lane0_2xbf16:
266 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
268 %b = extractelement <vscale x 2 x bfloat> %a, i32 0
272 define bfloat @test_lane1_2xbf16(<vscale x 2 x bfloat> %a) #0 {
273 ; CHECK-LABEL: test_lane1_2xbf16:
275 ; CHECK-NEXT: mov z0.d, z0.d[1]
276 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
278 %b = extractelement <vscale x 2 x bfloat> %a, i32 1
282 define bfloat @test_lane2_2xbf16(<vscale x 2 x bfloat> %a) #0 {
283 ; CHECK-LABEL: test_lane2_2xbf16:
285 ; CHECK-NEXT: mov z0.d, z0.d[2]
286 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
288 %b = extractelement <vscale x 2 x bfloat> %a, i32 2
292 define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
293 ; CHECK-LABEL: test_lane0_4xf32:
295 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
297 %b = extractelement <vscale x 4 x float> %a, i32 0
301 define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
302 ; CHECK-LABEL: test_lane3_4xf32:
304 ; CHECK-NEXT: mov z0.s, z0.s[3]
305 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
307 %b = extractelement <vscale x 4 x float> %a, i32 3
311 define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
312 ; CHECK-LABEL: test_lane4_4xf32:
314 ; CHECK-NEXT: mov z0.s, z0.s[4]
315 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
317 %b = extractelement <vscale x 4 x float> %a, i32 4
321 define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
322 ; CHECK-LABEL: test_lane0_2xf32:
324 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
326 %b = extractelement <vscale x 2 x float> %a, i32 0
330 define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
331 ; CHECK-LABEL: test_lane1_2xf32:
333 ; CHECK-NEXT: mov z0.d, z0.d[1]
334 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
336 %b = extractelement <vscale x 2 x float> %a, i32 1
340 define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
341 ; CHECK-LABEL: test_lane2_2xf32:
343 ; CHECK-NEXT: mov z0.d, z0.d[2]
344 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
346 %b = extractelement <vscale x 2 x float> %a, i32 2
350 define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
351 ; CHECK-LABEL: test_lane0_2xf64:
353 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
355 %b = extractelement <vscale x 2 x double> %a, i32 0
359 define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
360 ; CHECK-LABEL: test_lane1_2xf64:
362 ; CHECK-NEXT: mov z0.d, z0.d[1]
363 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
365 %b = extractelement <vscale x 2 x double> %a, i32 1
369 define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
370 ; CHECK-LABEL: test_lane2_2xf64:
372 ; CHECK-NEXT: mov z0.d, z0.d[2]
373 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
375 %b = extractelement <vscale x 2 x double> %a, i32 2
379 define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
380 ; CHECK-LABEL: test_lanex_16xi8:
382 ; CHECK-NEXT: mov w8, w0
383 ; CHECK-NEXT: whilels p0.b, xzr, x8
384 ; CHECK-NEXT: lastb w0, p0, z0.b
386 %b = extractelement <vscale x 16 x i8> %a, i32 %x
390 define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
391 ; CHECK-LABEL: test_lanex_8xi16:
393 ; CHECK-NEXT: mov w8, w0
394 ; CHECK-NEXT: whilels p0.h, xzr, x8
395 ; CHECK-NEXT: lastb w0, p0, z0.h
397 %b = extractelement <vscale x 8 x i16> %a, i32 %x
401 define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
402 ; CHECK-LABEL: test_lanex_4xi32:
404 ; CHECK-NEXT: mov w8, w0
405 ; CHECK-NEXT: whilels p0.s, xzr, x8
406 ; CHECK-NEXT: lastb w0, p0, z0.s
408 %b = extractelement <vscale x 4 x i32> %a, i32 %x
412 define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
413 ; CHECK-LABEL: test_lanex_2xi64:
415 ; CHECK-NEXT: mov w8, w0
416 ; CHECK-NEXT: whilels p0.d, xzr, x8
417 ; CHECK-NEXT: lastb x0, p0, z0.d
419 %b = extractelement <vscale x 2 x i64> %a, i32 %x
423 define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
424 ; CHECK-LABEL: test_lanex_8xf16:
426 ; CHECK-NEXT: mov w8, w0
427 ; CHECK-NEXT: whilels p0.h, xzr, x8
428 ; CHECK-NEXT: lastb h0, p0, z0.h
430 %b = extractelement <vscale x 8 x half> %a, i32 %x
434 define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
435 ; CHECK-LABEL: test_lanex_4xf16:
437 ; CHECK-NEXT: mov w8, w0
438 ; CHECK-NEXT: whilels p0.s, xzr, x8
439 ; CHECK-NEXT: lastb h0, p0, z0.h
441 %b = extractelement <vscale x 4 x half> %a, i32 %x
445 define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
446 ; CHECK-LABEL: test_lanex_2xf16:
448 ; CHECK-NEXT: mov w8, w0
449 ; CHECK-NEXT: whilels p0.d, xzr, x8
450 ; CHECK-NEXT: lastb h0, p0, z0.h
452 %b = extractelement <vscale x 2 x half> %a, i32 %x
456 define bfloat @test_lanex_8xbf16(<vscale x 8 x bfloat> %a, i32 %x) #0 {
457 ; CHECK-LABEL: test_lanex_8xbf16:
459 ; CHECK-NEXT: mov w8, w0
460 ; CHECK-NEXT: whilels p0.h, xzr, x8
461 ; CHECK-NEXT: lastb h0, p0, z0.h
463 %b = extractelement <vscale x 8 x bfloat> %a, i32 %x
467 define bfloat @test_lanex_4xbf16(<vscale x 4 x bfloat> %a, i32 %x) #0 {
468 ; CHECK-LABEL: test_lanex_4xbf16:
470 ; CHECK-NEXT: mov w8, w0
471 ; CHECK-NEXT: whilels p0.s, xzr, x8
472 ; CHECK-NEXT: lastb h0, p0, z0.h
474 %b = extractelement <vscale x 4 x bfloat> %a, i32 %x
478 define bfloat @test_lanex_2xbf16(<vscale x 2 x bfloat> %a, i32 %x) #0 {
479 ; CHECK-LABEL: test_lanex_2xbf16:
481 ; CHECK-NEXT: mov w8, w0
482 ; CHECK-NEXT: whilels p0.d, xzr, x8
483 ; CHECK-NEXT: lastb h0, p0, z0.h
485 %b = extractelement <vscale x 2 x bfloat> %a, i32 %x
489 define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
490 ; CHECK-LABEL: test_lanex_4xf32:
492 ; CHECK-NEXT: mov w8, w0
493 ; CHECK-NEXT: whilels p0.s, xzr, x8
494 ; CHECK-NEXT: lastb s0, p0, z0.s
496 %b = extractelement <vscale x 4 x float> %a, i32 %x
500 define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
501 ; CHECK-LABEL: test_lanex_2xf32:
503 ; CHECK-NEXT: mov w8, w0
504 ; CHECK-NEXT: whilels p0.d, xzr, x8
505 ; CHECK-NEXT: lastb s0, p0, z0.s
507 %b = extractelement <vscale x 2 x float> %a, i32 %x
511 define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
512 ; CHECK-LABEL: test_lanex_2xf64:
514 ; CHECK-NEXT: mov w8, w0
515 ; CHECK-NEXT: whilels p0.d, xzr, x8
516 ; CHECK-NEXT: lastb d0, p0, z0.d
518 %b = extractelement <vscale x 2 x double> %a, i32 %x
522 ; Deliberately choose an index that is undefined
523 define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
524 ; CHECK-LABEL: test_undef_lane_4xi32:
526 ; CHECK-NEXT: fmov w0, s0
528 %b = extractelement <vscale x 4 x i32> %a, i32 undef
532 define i8 @extract_of_insert_undef_16xi8(i8 %a) #0 {
533 ; CHECK-LABEL: extract_of_insert_undef_16xi8:
536 %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
537 %c = extractelement <vscale x 16 x i8> %b, i32 0
541 define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
542 ; CHECK-LABEL: extract0_of_insert0_16xi8:
545 %c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
546 %d = extractelement <vscale x 16 x i8> %c, i32 0
550 define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
551 ; CHECK-LABEL: extract64_of_insert64_16xi8:
554 %c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 64
555 %d = extractelement <vscale x 16 x i8> %c, i32 64
559 define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
560 ; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8:
562 ; CHECK-NEXT: umov w0, v0.b[3]
564 %c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
565 %d = extractelement <vscale x 16 x i8> %c, i32 3
569 define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) #0 {
570 ; CHECK-LABEL: test_lane0_zero_16xi8:
572 ; CHECK-NEXT: mov w0, wzr
574 %b = extractelement <vscale x 16 x i8> zeroinitializer, i32 0
578 ; The DAG combiner should fold the extract of a splat to give element zero
579 ; of the splat, i.e. %x. If the index is beyond the end of the scalable
580 ; vector the result is undefined anyway.
581 define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) #0 {
582 ; CHECK-LABEL: test_lanex_splat_2xi64:
585 %a = insertelement <vscale x 2 x i64> undef, i64 %x, i32 0
586 %b = shufflevector <vscale x 2 x i64> %a, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
587 %c = extractelement <vscale x 2 x i64> %b, i32 %y
591 define i1 @test_lane0_16xi1(<vscale x 16 x i1> %a) #0 {
592 ; CHECK-LABEL: test_lane0_16xi1:
594 ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
595 ; CHECK-NEXT: fmov w8, s0
596 ; CHECK-NEXT: and w0, w8, #0x1
598 %b = extractelement <vscale x 16 x i1> %a, i32 0
602 define i1 @test_lane9_8xi1(<vscale x 8 x i1> %a) #0 {
603 ; CHECK-LABEL: test_lane9_8xi1:
605 ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
606 ; CHECK-NEXT: mov z0.h, z0.h[9]
607 ; CHECK-NEXT: fmov w8, s0
608 ; CHECK-NEXT: and w0, w8, #0x1
610 %b = extractelement <vscale x 8 x i1> %a, i32 9
614 define i1 @test_last_8xi1(<vscale x 8 x i1> %a) #0 {
615 ; CHECK-LABEL: test_last_8xi1:
617 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
618 ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
619 ; CHECK-NEXT: whilels p0.h, xzr, x8
620 ; CHECK-NEXT: lastb w8, p0, z0.h
621 ; CHECK-NEXT: and w0, w8, #0x1
623 %vscale = call i64 @llvm.vscale.i64()
624 %shl = shl nuw nsw i64 %vscale, 3
625 %idx = add nuw nsw i64 %shl, -1
626 %bit = extractelement <vscale x 8 x i1> %a, i64 %idx
630 define i1 @test_lanex_4xi1(<vscale x 4 x i1> %a, i32 %x) #0 {
631 ; CHECK-LABEL: test_lanex_4xi1:
633 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
634 ; CHECK-NEXT: mov w8, w0
635 ; CHECK-NEXT: whilels p0.s, xzr, x8
636 ; CHECK-NEXT: lastb w8, p0, z0.s
637 ; CHECK-NEXT: and w0, w8, #0x1
639 %b = extractelement <vscale x 4 x i1> %a, i32 %x
643 define i1 @test_lane4_2xi1(<vscale x 2 x i1> %a) #0 {
644 ; CHECK-LABEL: test_lane4_2xi1:
646 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
647 ; CHECK-NEXT: mov z0.d, z0.d[4]
648 ; CHECK-NEXT: fmov x8, d0
649 ; CHECK-NEXT: and w0, w8, #0x1
651 %b = extractelement <vscale x 2 x i1> %a, i32 4
655 declare i64 @llvm.vscale.i64()
657 attributes #0 = { "target-features"="+sve,+bf16" }