test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - | FileCheck %s
   3
   4 define void @test(i32* %p1, i32* %p2) {
   5 ; CHECK-LABEL: test:
   6 ; CHECK:       // %bb.0:
   7 ; CHECK-NEXT:    mov w8, #3
   8 ; CHECK-NEXT:    mov w9, #1
   9 ; CHECK-NEXT:    str w8, [x0]
  10 ; CHECK-NEXT:    str w9, [x1]
  11 ; CHECK-NEXT:    ret
  12   %tmp = shufflevector <1 x i32> <i32 1>, <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
  13   %tmp2 = shufflevector <3 x i32> <i32 2, i32 3, i32 4>, <3 x i32> %tmp, <3 x i32> <i32 0, i32 1, i32 3>
  14   %tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
  15   %tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
  16   %tmp6 = extractelement <9 x i32> %tmp4, i32 7
  17   %tmp8 = extractelement <9 x i32> %tmp4, i32 8
  18   store i32 %tmp6, i32* %p1, align 4
  19   store i32 %tmp8, i32* %p2, align 4
  20   ret void
  21 }
  22
  23 ; Test case from PR41951
  24 define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
  25 ; CHECK-LABEL: widen_shuffles_reduced:
  26 ; CHECK:       // %bb.0:
  27 ; CHECK-NEXT:    zip1 v2.4s, v0.4s, v1.4s
  28 ; CHECK-NEXT:    zip1 v0.4s, v1.4s, v0.4s
  29 ; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
  30 ; CHECK-NEXT:    ret
  31   %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
  32   ret <4 x i32> %s3
  33 }
  34
  35 define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
  36 ; CHECK-LABEL: zip_mask_check:
  37 ; CHECK:       // %bb.0:
  38 ; CHECK-NEXT:    ldr q0, [x0]
  39 ; CHECK-NEXT:    ldr d1, [x1]
  40 ; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
  41 ; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
  42 ; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
  43 ; CHECK-NEXT:    str s0, [x2]
  44 ; CHECK-NEXT:    ret
  45   %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
  46   %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
  47   %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
  48   %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
  49   %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
  50   %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
  51   %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
  52   %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  53   %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
  54   %tmp12 = extractelement <16 x i32> %tmp11, i32 0
  55   store i32 %tmp12, i32* %p3, align 4
  56   ret void
  57 }
  58
  59 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1