llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple arm64-- | FileCheck %s
   3
   4 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
   5
   6 ; Test the (concat_vectors (trunc), (trunc)) pattern.
   7
   8 define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
   9 ; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
  10 ; CHECK:       // %bb.0: // %entry
  11 ; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
  12 ; CHECK-NEXT:    xtn v0.4h, v0.4s
  13 ; CHECK-NEXT:    ret
  14 entry:
  15   %at = trunc <2 x i64> %a to <2 x i16>
  16   %bt = trunc <2 x i64> %b to <2 x i16>
  17   %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  18   ret <4 x i16> %shuffle
  19 }
  20
  21 define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 {
  22 ; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32:
  23 ; CHECK:       // %bb.0: // %entry
  24 ; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
  25 ; CHECK-NEXT:    ret
  26 entry:
  27   %at = trunc <2 x i64> %a to <2 x i32>
  28   %bt = trunc <2 x i64> %b to <2 x i32>
  29   %shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  30   ret <4 x i32> %shuffle
  31 }
  32
  33 define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 {
  34 ; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16:
  35 ; CHECK:       // %bb.0: // %entry
  36 ; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
  37 ; CHECK-NEXT:    ret
  38 entry:
  39   %at = trunc <2 x i32> %a to <2 x i16>
  40   %bt = trunc <2 x i32> %b to <2 x i16>
  41   %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  42   ret <4 x i16> %shuffle
  43 }
  44
  45 define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
  46 ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
  47 ; CHECK:       // %bb.0: // %entry
  48 ; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
  49 ; CHECK-NEXT:    xtn v0.8b, v0.8h
  50 ; CHECK-NEXT:    ret
  51 entry:
  52   %at = trunc <4 x i32> %a to <4 x i8>
  53   %bt = trunc <4 x i32> %b to <4 x i8>
  54   %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  55   ret <8 x i8> %shuffle
  56 }
  57
  58 define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
  59 ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
  60 ; CHECK:       // %bb.0: // %entry
  61 ; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
  62 ; CHECK-NEXT:    ret
  63 entry:
  64   %at = trunc <4 x i32> %a to <4 x i16>
  65   %bt = trunc <4 x i32> %b to <4 x i16>
  66   %shuffle = shufflevector <4 x i16> %at, <4 x i16> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  67   ret <8 x i16> %shuffle
  68 }
  69
  70 define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 {
  71 ; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8:
  72 ; CHECK:       // %bb.0: // %entry
  73 ; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
  74 ; CHECK-NEXT:    ret
  75 entry:
  76   %at = trunc <4 x i16> %a to <4 x i8>
  77   %bt = trunc <4 x i16> %b to <4 x i8>
  78   %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  79   ret <8 x i8> %shuffle
  80 }
  81
  82 define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 {
  83 ; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8:
  84 ; CHECK:       // %bb.0: // %entry
  85 ; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
  86 ; CHECK-NEXT:    ret
  87 entry:
  88   %at = trunc <8 x i16> %a to <8 x i8>
  89   %bt = trunc <8 x i16> %b to <8 x i8>
  90   %shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32  9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  91   ret <16 x i8> %shuffle
  92 }
  93
  94 ; The concat_vectors operation in this test is introduced when splitting
  95 ; the fptrunc operation due to the split <vscale x 4 x double> input operand.
  96 define void @test_concat_fptrunc_v4f64_to_v4f32(ptr %ptr) #1 {
  97 ; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
  98 ; CHECK:       // %bb.0: // %entry
  99 ; CHECK-NEXT:    fmov z0.s, #1.00000000
 100 ; CHECK-NEXT:    ptrue p0.s
 101 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 102 ; CHECK-NEXT:    ret
 103 entry:
 104   %0 = shufflevector <vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
 105   %1 = fptrunc <vscale x 4 x double> %0 to <vscale x 4 x float>
 106   store <vscale x 4 x float> %1, ptr %ptr, align 4
 107   ret void
 108 }
 109
 110 attributes #0 = { nounwind }
 111 attributes #1 = { "target-features"="+sve" }