llvm/test/CodeGen/AArch64/concat_vector-scalar-combine.ll

   1 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
   2
   3 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
   4
   5 ; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
   6
   7 define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
   8 entry:
   9 ; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
  10 ; CHECK-NEXT: dup.4h v0, w0
  11 ; CHECK-NEXT: ret
  12   %t = trunc i32 %x to i16
  13   %0 = bitcast i16 %t to <2 x i8>
  14   %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  15   ret <8 x i8> %1
  16 }
  17
  18 define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
  19 entry:
  20 ; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
  21 ; CHECK-NEXT: dup.2s v0, w0
  22 ; CHECK-NEXT: ret
  23   %0 = bitcast i32 %x to <4 x i8>
  24   %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  25   ret <8 x i8> %1
  26 }
  27
  28 define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
  29 entry:
  30 ; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
  31 ; CHECK-NEXT: dup.4s v0, w0
  32 ; CHECK-NEXT: ret
  33   %0 = bitcast i32 %x to <2 x i16>
  34   %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
  35   ret <8 x i16> %1
  36 }
  37
  38 define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
  39 entry:
  40 ; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
  41 ; CHECK-NEXT: fmov s0, w0
  42 ; CHECK-NEXT: mov.h v0[1], w1
  43 ; CHECK-NEXT: mov.h v0[3], w1
  44 ; CHECK-NEXT: ret
  45   %tx = trunc i32 %x to i16
  46   %ty = trunc i32 %y to i16
  47   %bx = bitcast i16 %tx to <2 x i8>
  48   %by = bitcast i16 %ty to <2 x i8>
  49   %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
  50   ret <8 x i8> %r
  51 }
  52
  53 define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
  54 entry:
  55 ; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
  56 ; CHECK-NEXT: fmov s0, w1
  57 ; CHECK-NEXT: mov.s v0[1], w0
  58 ; CHECK-NEXT: ret
  59   %bx = bitcast i32 %x to <4 x i8>
  60   %by = bitcast i32 %y to <4 x i8>
  61   %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  62   ret <8 x i8> %r
  63 }
  64
  65 define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
  66 entry:
  67 ; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
  68 ; CHECK-NEXT: fmov s0, w0
  69 ; CHECK-NEXT: mov.s v0[1], w1
  70 ; CHECK-NEXT: mov.s v0[2], w1
  71 ; CHECK-NEXT: mov.s v0[3], w0
  72 ; CHECK-NEXT: ret
  73   %bx = bitcast i32 %x to <2 x i16>
  74   %by = bitcast i32 %y to <2 x i16>
  75   %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
  76   ret <8 x i16> %r
  77 }
  78
  79 ; Also make sure we minimize bitcasts.
  80
  81 ; This is a pretty artificial testcase: make sure we bitcast to floating-point
  82 ; if any of the scalars is floating-point.
  83 define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
  84 entry:
  85 ; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
  86 ; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
  87 ; CHECK-NEXT: mov.16b v0, v[[X]]
  88 ; CHECK-NEXT: mov.h v0[1], v1[0]
  89 ; CHECK-NEXT: mov.h v0[2], v[[X]][0]
  90 ; CHECK-NEXT: mov.h v0[3], v1[0]
  91 ; CHECK-NEXT: ret
  92   %t = trunc i32 %x to i16
  93   %0 = bitcast i16 %t to <2 x i8>
  94   %y0 = bitcast half %y to <2 x i8>
  95   %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  96   ret <8 x i8> %1
  97 }
  98
  99 define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
 100 entry:
 101 ; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
 102 ; CHECK-NEXT: mov.16b v0, v1
 103 ; CHECK-NEXT: mov.h v0[1], v2[0]
 104 ; CHECK-NEXT: mov.h v0[2], v1[0]
 105 ; CHECK-NEXT: mov.h v0[3], v2[0]
 106 ; CHECK-NEXT: ret
 107   %0 = bitcast half %x to <2 x i8>
 108   %y0 = bitcast half %y to <2 x i8>
 109   %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
 110   %2 = bitcast <8 x i8> %1 to <2 x float>
 111   ret <2 x float> %2
 112 }
 113
 114 define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
 115 entry:
 116 ; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
 117 ; CHECK-NEXT: dup.4s v0, v0[0]
 118 ; CHECK-NEXT: ret
 119   %0 = bitcast float %x to <2 x i16>
 120   %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
 121   %2 = bitcast <8 x i16> %1 to <4 x float>
 122   ret <4 x float> %2
 123 }
 124
 125 attributes #0 = { nounwind }