llvm/test/Transforms/SLPVectorizer/X86/reused-mask-with-poison-index.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
   2 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
   3
   4 define fastcc i32 @test(ptr %0, <2 x float> %1, i1 %2, float %3, float %4) {
   5 ; CHECK-LABEL: define fastcc i32 @test(
   6 ; CHECK-SAME: ptr [[TMP0:%.*]], <2 x float> [[TMP1:%.*]], i1 [[TMP2:%.*]], float [[TMP3:%.*]], float [[TMP4:%.*]]) {
   7 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
   8 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP1]], i64 1
   9 ; CHECK-NEXT:    br label %[[BB8:.*]]
  10 ; CHECK:       [[BB8]]:
  11 ; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ 0.000000e+00, [[TMP5:%.*]] ], [ [[TMP58:%.*]], %[[TMP56:.*]] ]
  12 ; CHECK-NEXT:    [[TMP10:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP59:%.*]], %[[TMP56]] ]
  13 ; CHECK-NEXT:    [[TMP11:%.*]] = phi float [ [[TMP4]], [[TMP5]] ], [ [[TMP60:%.*]], %[[TMP56]] ]
  14 ; CHECK-NEXT:    [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP5]] ], [ [[TMP61:%.*]], %[[TMP56]] ]
  15 ; CHECK-NEXT:    [[TMP13:%.*]] = phi float [ [[TMP6]], [[TMP5]] ], [ [[TMP62:%.*]], %[[TMP56]] ]
  16 ; CHECK-NEXT:    [[TMP14:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP63:%.*]], %[[TMP56]] ]
  17 ; CHECK-NEXT:    [[TMP15:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP64:%.*]], %[[TMP56]] ]
  18 ; CHECK-NEXT:    [[TMP16:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP65:%.*]], %[[TMP56]] ]
  19 ; CHECK-NEXT:    [[TMP17:%.*]] = phi float [ undef, [[TMP5]] ], [ [[TMP66:%.*]], %[[TMP56]] ]
  20 ; CHECK-NEXT:    [[TMP18:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP67:%.*]], %[[TMP56]] ]
  21 ; CHECK-NEXT:    [[TMP19:%.*]] = phi float [ [[TMP4]], [[TMP5]] ], [ [[TMP68:%.*]], %[[TMP56]] ]
  22 ; CHECK-NEXT:    [[TMP20:%.*]] = phi float [ [[TMP4]], [[TMP5]] ], [ [[TMP69:%.*]], %[[TMP56]] ]
  23 ; CHECK-NEXT:    [[TMP21:%.*]] = phi float [ [[TMP4]], [[TMP5]] ], [ [[TMP70:%.*]], %[[TMP56]] ]
  24 ; CHECK-NEXT:    [[TMP22:%.*]] = phi float [ [[TMP4]], [[TMP5]] ], [ [[TMP71:%.*]], %[[TMP56]] ]
  25 ; CHECK-NEXT:    [[TMP23:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP73:%.*]], %[[TMP56]] ]
  26 ; CHECK-NEXT:    [[TMP24:%.*]] = phi float [ 0.000000e+00, [[TMP5]] ], [ [[TMP72:%.*]], %[[TMP56]] ]
  27 ; CHECK-NEXT:    [[TMP25:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP5]] ], [ poison, %[[TMP56]] ]
  28 ; CHECK-NEXT:    [[TMP26:%.*]] = phi <2 x float> [ zeroinitializer, [[TMP5]] ], [ poison, %[[TMP56]] ]
  29 ; CHECK-NEXT:    br i1 false, label %[[BB57:.*]], label %[[BB27:.*]]
  30 ; CHECK:       [[BB27]]:
  31 ; CHECK-NEXT:    [[TMP28:%.*]] = fcmp olt float [[TMP22]], 0.000000e+00
  32 ; CHECK-NEXT:    br i1 [[TMP28]], label %[[BB57]], label %[[BB29:.*]]
  33 ; CHECK:       [[BB29]]:
  34 ; CHECK-NEXT:    [[TMP30:%.*]] = fcmp olt float [[TMP21]], 0.000000e+00
  35 ; CHECK-NEXT:    br i1 [[TMP30]], label %[[BB57]], label %[[BB31:.*]]
  36 ; CHECK:       [[BB31]]:
  37 ; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
  38 ; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <4 x float> [[TMP32]], <4 x float> poison, <4 x i32> zeroinitializer
  39 ; CHECK-NEXT:    [[TMP34:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP33]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
  40 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x float> [[TMP25]], i32 0
  41 ; CHECK-NEXT:    [[TMP36:%.*]] = fsub float [[TMP17]], [[TMP35]]
  42 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x float> [[TMP25]], i32 1
  43 ; CHECK-NEXT:    [[TMP38:%.*]] = fsub float [[TMP15]], [[TMP37]]
  44 ; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP14]], i64 0
  45 ; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <4 x float> [[TMP25]], <4 x float> poison, <2 x i32> <i32 poison, i32 2>
  46 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <2 x float> [[TMP39]], <2 x float> [[TMP40]], <2 x i32> <i32 0, i32 3>
  47 ; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP38]], i64 0
  48 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP36]], i64 0
  49 ; CHECK-NEXT:    [[TMP44:%.*]] = fmul <2 x float> [[TMP42]], [[TMP43]]
  50 ; CHECK-NEXT:    [[TMP45:%.*]] = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP41]], <2 x float> [[TMP26]], <2 x float> [[TMP44]])
  51 ; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <2 x float> [[TMP45]], i64 0
  52 ; CHECK-NEXT:    [[TMP47:%.*]] = fcmp ogt float [[TMP46]], 0.000000e+00
  53 ; CHECK-NEXT:    br i1 [[TMP47]], label %[[BB48:.*]], label %[[BB50:.*]]
  54 ; CHECK:       [[BB48]]:
  55 ; CHECK-NEXT:    br label %[[BB50]]
  56 ; CHECK:       [[BB49:.*]]:
  57 ; CHECK-NEXT:    br label %[[BB50]]
  58 ; CHECK:       [[BB50]]:
  59 ; CHECK-NEXT:    [[TMP51:%.*]] = phi float [ [[TMP12]], %[[BB49]] ], [ [[TMP19]], %[[BB48]] ], [ 0.000000e+00, %[[BB31]] ]
  60 ; CHECK-NEXT:    [[TMP52:%.*]] = phi float [ [[TMP13]], %[[BB49]] ], [ [[TMP20]], %[[BB48]] ], [ 0.000000e+00, %[[BB31]] ]
  61 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[BB57]], label %[[BB53:.*]]
  62 ; CHECK:       [[BB53]]:
  63 ; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
  64 ; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <4 x float> [[TMP25]], i32 3
  65 ; CHECK-NEXT:    br label %[[BB57]]
  66 ; CHECK:       [[TMP56]]:
  67 ; CHECK-NEXT:    br label %[[BB8]]
  68 ; CHECK:       [[BB57]]:
  69 ; CHECK-NEXT:    [[TMP58]] = phi float [ [[TMP9]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP3]], %[[BB50]] ]
  70 ; CHECK-NEXT:    [[TMP59]] = phi float [ [[TMP10]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ [[TMP55]], %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  71 ; CHECK-NEXT:    [[TMP60]] = phi float [ [[TMP11]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP3]], %[[BB50]] ]
  72 ; CHECK-NEXT:    [[TMP61]] = phi float [ [[TMP12]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  73 ; CHECK-NEXT:    [[TMP62]] = phi float [ [[TMP13]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ [[TMP54]], %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  74 ; CHECK-NEXT:    [[TMP63]] = phi float [ [[TMP14]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP9]], %[[BB50]] ]
  75 ; CHECK-NEXT:    [[TMP64]] = phi float [ [[TMP15]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ [[TMP55]], %[[BB53]] ], [ [[TMP10]], %[[BB50]] ]
  76 ; CHECK-NEXT:    [[TMP65]] = phi float [ [[TMP16]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP11]], %[[BB50]] ]
  77 ; CHECK-NEXT:    [[TMP66]] = phi float [ [[TMP17]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  78 ; CHECK-NEXT:    [[TMP67]] = phi float [ [[TMP18]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  79 ; CHECK-NEXT:    [[TMP68]] = phi float [ [[TMP19]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP3]], %[[BB50]] ]
  80 ; CHECK-NEXT:    [[TMP69]] = phi float [ [[TMP20]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ [[TMP54]], %[[BB53]] ], [ 0.000000e+00, %[[BB50]] ]
  81 ; CHECK-NEXT:    [[TMP70]] = phi float [ [[TMP21]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP51]], %[[BB50]] ]
  82 ; CHECK-NEXT:    [[TMP71]] = phi float [ [[TMP22]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ [[TMP54]], %[[BB53]] ], [ [[TMP52]], %[[BB50]] ]
  83 ; CHECK-NEXT:    [[TMP72]] = phi float [ [[TMP24]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP24]], %[[BB50]] ]
  84 ; CHECK-NEXT:    [[TMP73]] = phi float [ [[TMP23]], %[[BB29]] ], [ 0.000000e+00, %[[BB27]] ], [ 0.000000e+00, %[[BB8]] ], [ 0.000000e+00, %[[BB53]] ], [ [[TMP23]], %[[BB50]] ]
  85 ; CHECK-NEXT:    [[TMP74:%.*]] = phi <4 x float> [ [[TMP25]], %[[BB29]] ], [ [[TMP25]], %[[BB27]] ], [ zeroinitializer, %[[BB8]] ], [ [[TMP34]], %[[BB53]] ], [ [[TMP34]], %[[BB50]] ]
  86 ; CHECK-NEXT:    [[TMP75:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP73]], float [[TMP72]], float 0.000000e+00)
  87 ; CHECK-NEXT:    store float [[TMP75]], ptr [[TMP0]], align 4
  88 ; CHECK-NEXT:    ret i32 0
  89 ;
  90   %6 = extractelement <2 x float> %1, i64 0
  91   %7 = extractelement <2 x float> %1, i64 1
  92   br label %8
  93
  94 8:
  95   %9 = phi float [ 0.000000e+00, %5 ], [ %59, %57 ]
  96   %10 = phi float [ 0.000000e+00, %5 ], [ %60, %57 ]
  97   %11 = phi float [ 0.000000e+00, %5 ], [ %61, %57 ]
  98   %12 = phi float [ %4, %5 ], [ %62, %57 ]
  99   %13 = phi float [ %7, %5 ], [ %63, %57 ]
 100   %14 = phi float [ %6, %5 ], [ %64, %57 ]
 101   %15 = phi float [ 0.000000e+00, %5 ], [ %65, %57 ]
 102   %16 = phi float [ 0.000000e+00, %5 ], [ %66, %57 ]
 103   %17 = phi float [ 0.000000e+00, %5 ], [ %67, %57 ]
 104   %18 = phi float [ 0.000000e+00, %5 ], [ %68, %57 ]
 105   %19 = phi float [ undef, %5 ], [ %69, %57 ]
 106   %20 = phi float [ 0.000000e+00, %5 ], [ %70, %57 ]
 107   %21 = phi float [ 0.000000e+00, %5 ], [ %71, %57 ]
 108   %22 = phi float [ %4, %5 ], [ %72, %57 ]
 109   %23 = phi float [ %4, %5 ], [ %73, %57 ]
 110   %24 = phi float [ %4, %5 ], [ %74, %57 ]
 111   %25 = phi float [ %4, %5 ], [ %75, %57 ]
 112   %26 = phi float [ 0.000000e+00, %5 ], [ %76, %57 ]
 113   %27 = phi float [ 0.000000e+00, %5 ], [ %78, %57 ]
 114   %28 = phi float [ 0.000000e+00, %5 ], [ %77, %57 ]
 115   br i1 false, label %58, label %29
 116
 117 29:
 118   %30 = fcmp olt float %25, 0.000000e+00
 119   br i1 %30, label %58, label %31
 120
 121 31:
 122   %32 = fcmp olt float %24, 0.000000e+00
 123   br i1 %32, label %58, label %33
 124
 125 33:
 126   %34 = tail call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00)
 127   %35 = tail call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00)
 128   %36 = tail call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00)
 129   %37 = tail call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00)
 130   %38 = insertelement <2 x float> zeroinitializer, float %17, i64 0
 131   %39 = insertelement <2 x float> %38, float %21, i64 1
 132   %40 = fsub float %19, %18
 133   %41 = fsub float %16, %20
 134   %42 = insertelement <2 x float> zeroinitializer, float %15, i64 0
 135   %43 = insertelement <2 x float> %42, float %26, i64 1
 136   %44 = insertelement <2 x float> zeroinitializer, float %41, i64 0
 137   %45 = insertelement <2 x float> zeroinitializer, float %40, i64 0
 138   %46 = fmul <2 x float> %44, %45
 139   %47 = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %43, <2 x float> %39, <2 x float> %46)
 140   %48 = extractelement <2 x float> %47, i64 0
 141   %49 = fcmp ogt float %48, 0.000000e+00
 142   br i1 %49, label %50, label %52
 143
 144 50:
 145   br label %52
 146
 147 51:
 148   br label %52
 149
 150 52:
 151   %53 = phi float [ %13, %51 ], [ %22, %50 ], [ 0.000000e+00, %33 ]
 152   %54 = phi float [ %14, %51 ], [ %23, %50 ], [ 0.000000e+00, %33 ]
 153   br i1 %2, label %58, label %55
 154
 155 55:
 156   %56 = extractelement <2 x float> %1, i64 0
 157   br label %58
 158
 159 57:
 160   br label %8
 161
 162 58:
 163   %59 = phi float [ %9, %31 ], [ %9, %29 ], [ 0.000000e+00, %8 ], [ %36, %55 ], [ %36, %52 ]
 164   %60 = phi float [ %10, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %3, %52 ]
 165   %61 = phi float [ %11, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ %9, %55 ], [ 0.000000e+00, %52 ]
 166   %62 = phi float [ %12, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %3, %52 ]
 167   %63 = phi float [ %13, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ 0.000000e+00, %52 ]
 168   %64 = phi float [ %14, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ %56, %55 ], [ 0.000000e+00, %52 ]
 169   %65 = phi float [ %15, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %10, %52 ]
 170   %66 = phi float [ %16, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ %9, %55 ], [ %11, %52 ]
 171   %67 = phi float [ %17, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %12, %52 ]
 172   %68 = phi float [ %18, %31 ], [ %18, %29 ], [ 0.000000e+00, %8 ], [ %35, %55 ], [ %35, %52 ]
 173   %69 = phi float [ %19, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ 0.000000e+00, %52 ]
 174   %70 = phi float [ %20, %31 ], [ %20, %29 ], [ 0.000000e+00, %8 ], [ %34, %55 ], [ %34, %52 ]
 175   %71 = phi float [ %21, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ 0.000000e+00, %52 ]
 176   %72 = phi float [ %22, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %3, %52 ]
 177   %73 = phi float [ %23, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ %56, %55 ], [ 0.000000e+00, %52 ]
 178   %74 = phi float [ %24, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %53, %52 ]
 179   %75 = phi float [ %25, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ %56, %55 ], [ %54, %52 ]
 180   %76 = phi float [ %26, %31 ], [ %26, %29 ], [ 0.000000e+00, %8 ], [ %37, %55 ], [ %37, %52 ]
 181   %77 = phi float [ %28, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %28, %52 ]
 182   %78 = phi float [ %27, %31 ], [ 0.000000e+00, %29 ], [ 0.000000e+00, %8 ], [ 0.000000e+00, %55 ], [ %27, %52 ]
 183   %79 = tail call float @llvm.fmuladd.f32(float %78, float %77, float 0.000000e+00)
 184   store float %79, ptr %0, align 4
 185   ret i32 0
 186 }