llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll

   1 ; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
   2
   3 target triple = "aarch64-unknown-linux-gnu"
   4
   5 define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 {
   6 ; CHECK-LABEL: @pred_load_v2i8(
   7 ; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
   8 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
   9   %load = load <2 x i8>, ptr %addr, align 4
  10   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
  11   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  12   ret <vscale x 16 x i1> %ret
  13 }
  14
  15 define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 {
  16 ; CHECK-LABEL: @pred_load_v4i8(
  17 ; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
  18 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  19   %load = load <4 x i8>, ptr %addr, align 4
  20   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
  21   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  22   ret <vscale x 16 x i1> %ret
  23 }
  24
  25 define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 {
  26 ; CHECK-LABEL: @pred_load_v8i8(
  27 ; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
  28 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  29   %load = load <8 x i8>, ptr %addr, align 4
  30   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
  31   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  32   ret <vscale x 16 x i1> %ret
  33 }
  34
  35 ; Ensure the insertion point is at the load
  36 define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 {
  37 ; CHECK-LABEL: @pred_load_insertion_point(
  38 ; CHECK-NEXT:  entry:
  39 ; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
  40 ; CHECK-NEXT:    br label %bb1
  41 ; CHECK:       bb1:
  42 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  43 entry:
  44   %load = load <2 x i8>, ptr %addr, align 4
  45   br label %bb1
  46
  47 bb1:
  48   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
  49   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  50   ret <vscale x 16 x i1> %ret
  51 }
  52
  53 ; Check that too small of a vscale prevents optimization
  54 define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 {
  55 ; CHECK-LABEL: @pred_load_neg1(
  56 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  57   %load = load <4 x i8>, ptr %addr, align 4
  58   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
  59   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  60   ret <vscale x 16 x i1> %ret
  61 }
  62
  63 ; Check that too large of a vscale prevents optimization
  64 define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 {
  65 ; CHECK-LABEL: @pred_load_neg2(
  66 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  67   %load = load <4 x i8>, ptr %addr, align 4
  68   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
  69   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  70   ret <vscale x 16 x i1> %ret
  71 }
  72
  73 ; Check that a non-zero index prevents optimization
  74 define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 {
  75 ; CHECK-LABEL: @pred_load_neg3(
  76 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  77   %load = load <4 x i8>, ptr %addr, align 4
  78   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
  79   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  80   ret <vscale x 16 x i1> %ret
  81 }
  82
  83 ; Check that differing vscale min/max prevents optimization
  84 define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 {
  85 ; CHECK-LABEL: @pred_load_neg4(
  86 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  87   %load = load <4 x i8>, ptr %addr, align 4
  88   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
  89   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  90   ret <vscale x 16 x i1> %ret
  91 }
  92
  93 ; Check that insertion into a non-undef vector prevents optimization
  94 define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 {
  95 ; CHECK-LABEL: @pred_load_neg5(
  96 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  97   %load = load <4 x i8>, ptr %addr, align 4
  98   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
  99   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
 100   ret <vscale x 16 x i1> %ret
 101 }
 102
 103 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
 104 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
 105 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
 106
 107 attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
 108 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
 109 attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
 110 attributes #3 = { "target-features"="+sve" vscale_range(2,4) }