llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
   3
   4 define i32 @test() {
   5 ; CHECK-LABEL: @test(
   6 ; CHECK-NEXT:  entry:
   7 ; CHECK-NEXT:    br label [[IF_END_I87:%.*]]
   8 ; CHECK:       if.end.i87:
   9 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
  10 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2)
  11 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  12 ; CHECK-NEXT:    switch i32 0, label [[SW_BB509_I:%.*]] [
  13 ; CHECK-NEXT:      i32 1, label [[SW_BB509_I]]
  14 ; CHECK-NEXT:      i32 0, label [[IF_THEN458_I:%.*]]
  15 ; CHECK-NEXT:    ]
  16 ; CHECK:       if.then458.i:
  17 ; CHECK-NEXT:    br label [[SW_BB509_I]]
  18 ; CHECK:       sw.bb509.i:
  19 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ]
  20 ; CHECK-NEXT:    ret i32 0
  21 ;
  22 entry:
  23   %getelementptr0 = getelementptr i8, ptr null, i64 64036
  24   %getelementptr1 = getelementptr i8, ptr null, i64 64064
  25   br label %if.end.i87
  26
  27 if.end.i87:                                       ; preds = %entry
  28   %0 = load <2 x i32>, ptr %getelementptr0, align 4
  29   %1 = load <2 x i32>, ptr %getelementptr1, align 8
  30   switch i32 0, label %sw.bb509.i [
  31   i32 1, label %sw.bb509.i
  32   i32 0, label %if.then458.i
  33   ]
  34
  35 if.then458.i:                                     ; preds = %if.end.i87
  36   br label %sw.bb509.i
  37
  38 sw.bb509.i:                                       ; preds = %if.then458.i, %if.end.i87, %if.end.i87
  39   %4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ]
  40   %5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ]
  41   ret i32 0
  42 }
  43
  44 define void @test2() {
  45 ; CHECK-LABEL: @test2(
  46 ; CHECK-NEXT:  entry:
  47 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr null, i64 132
  48 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr null, i64 200
  49 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr null, i64 300
  50 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
  51 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
  52 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
  53 ; CHECK-NEXT:    [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0)
  54 ; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
  55 ; CHECK-NEXT:    [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
  56 ; CHECK-NEXT:    [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
  57 ; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
  58 ; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
  59 ; CHECK-NEXT:    [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
  60 ; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
  61 ; CHECK-NEXT:    [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
  62 ; CHECK-NEXT:    [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
  63 ; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
  64 ; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
  65 ; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
  66 ; CHECK-NEXT:    [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
  67 ; CHECK-NEXT:    [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
  68 ; CHECK-NEXT:    ret void
  69 ;
  70 entry:
  71   %0 = getelementptr i8, ptr null, i64 132
  72   %1 = getelementptr i8, ptr null, i64 164
  73   %2 = getelementptr i8, ptr null, i64 200
  74   %3 = getelementptr i8, ptr null, i64 300
  75   %4 = load <8 x float>, ptr %0, align 4
  76   %5 = load <8 x float>, ptr %1, align 4
  77   %6 = load <8 x float>, ptr %2, align 4
  78   %7 = load <8 x float>, ptr %3, align 4
  79   %8 = fpext <8 x float> %4 to <8 x double>
  80   %9 = fpext <8 x float> %5 to <8 x double>
  81   %10 = fpext <8 x float> %6 to <8 x double>
  82   %11 = fpext <8 x float> %7 to <8 x double>
  83   %12 = fadd <8 x double> zeroinitializer, %8
  84   %13 = fadd <8 x double> zeroinitializer, %9
  85   %14 = fadd <8 x double> zeroinitializer, %10
  86   %15 = fadd <8 x double> zeroinitializer, %11
  87   %16 = fptrunc <8 x double> %12 to <8 x float>
  88   %17 = fptrunc <8 x double> %13 to <8 x float>
  89   %18 = fptrunc <8 x double> %14 to <8 x float>
  90   %19 = fptrunc <8 x double> %15 to <8 x float>
  91   %20 = fcmp ogt <8 x float> zeroinitializer, %16
  92   %21 = fcmp ogt <8 x float> zeroinitializer, %17
  93   %22 = fcmp ogt <8 x float> zeroinitializer, %18
  94   %23 = fcmp ogt <8 x float> zeroinitializer, %19
  95   ret void
  96 }
  97
  98 define void @test3(float %0) {
  99 ; CHECK-LABEL: @test3(
 100 ; CHECK-NEXT:  entry:
 101 ; CHECK-NEXT:    br label [[FOR_BODY_LR_PH:%.*]]
 102 ; CHECK:       for.body.lr.ph:
 103 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
 104 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2)
 105 ; CHECK-NEXT:    br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
 106 ; CHECK:       for.cond.cleanup:
 107 ; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
 108 ; CHECK-NEXT:    ret void
 109 ; CHECK:       for.body:
 110 ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
 111 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
 112 ; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
 113 ; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
 114 ; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
 115 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 116 ; CHECK-NEXT:    [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]]
 117 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 118 ;
 119 entry:
 120   br label %for.body.lr.ph
 121
 122 for.body.lr.ph:
 123   br i1 false, label %for.cond.cleanup, label %for.body
 124
 125 for.cond.cleanup:                                 ; preds = %for.body, %for.body.lr.ph
 126   %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ]
 127   %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ]
 128   ret void
 129
 130 for.body:
 131   %3 = load <2 x float>, ptr null, align 4
 132   %4 = fcmp olt <2 x float> zeroinitializer, %3
 133   %5 = select <2 x i1> <i1 true, i1 true>, <2 x float> %3, <2 x float> zeroinitializer
 134   %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
 135   br label %for.cond.cleanup
 136 }