test/Transforms/SLPVectorizer/X86/value-bug.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
   3
   4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   5
   6 ; We used to crash on this example because we were building a constant
   7 ; expression during vectorization and the vectorizer expects instructions
   8 ; as elements of the vectorized tree.
   9 ; PR19621
  10
  11 define void @test() {
  12 ; CHECK-LABEL: @test(
  13 ; CHECK-NEXT:  bb279:
  14 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0
  15 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
  16 ; CHECK-NEXT:    br label [[BB283:%.*]]
  17 ; CHECK:       bb283:
  18 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ]
  19 ; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ]
  20 ; CHECK-NEXT:    br label [[BB284:%.*]]
  21 ; CHECK:       bb284:
  22 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
  23 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef
  24 ; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef
  25 ; CHECK-NEXT:    br label [[BB21_I:%.*]]
  26 ; CHECK:       bb21.i:
  27 ; CHECK-NEXT:    br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
  28 ; CHECK:       bb22.i:
  29 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]]
  30 ; CHECK-NEXT:    br label [[BB32_I:%.*]]
  31 ; CHECK:       bb32.i:
  32 ; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
  33 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
  34 ; CHECK:       exit:
  35 ; CHECK-NEXT:    [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
  36 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], <double undef, double 0.000000e+00>
  37 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]]
  38 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef
  39 ; CHECK-NEXT:    [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
  40 ; CHECK-NEXT:    br label [[BB283]]
  41 ;
  42 bb279:
  43   br label %bb283
  44
  45 bb283:
  46   %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ]
  47   %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ]
  48   %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ]
  49   %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ]
  50   br label %bb284
  51
  52 bb284:
  53   %tmp7.i = fpext float %Av.sroa.3.0 to double
  54   %tmp8.i = fsub double %tmp7.i, undef
  55   %tmp9.i = fsub double %tmp8.i, undef
  56   %tmp17.i = fpext float %Av.sroa.8.0 to double
  57   %tmp19.i = fsub double %tmp17.i, undef
  58   %tmp20.i = fsub double %tmp19.i, undef
  59   br label %bb21.i
  60
  61 bb21.i:
  62   br i1 undef, label %bb22.i, label %exit
  63
  64 bb22.i:
  65   %tmp24.i = fadd double undef, %tmp9.i
  66   %tmp26.i = fadd double undef, %tmp20.i
  67   br label %bb32.i
  68
  69 bb32.i:
  70   %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
  71   %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
  72   br i1 undef, label %bb32.i, label %bb21.i
  73
  74 exit:
  75   %tmp303 = fpext float %Av.sroa.0.0 to double
  76   %tmp304 = fmul double %tmp303, undef
  77   %tmp305 = fadd double undef, %tmp304
  78   %tmp306 = fadd double %tmp305, undef
  79   %tmp307 = fptrunc double %tmp306 to float
  80   %tmp311 = fpext float %Av.sroa.5.0 to double
  81   %tmp312 = fmul double %tmp311, 0.000000e+00
  82   %tmp313 = fadd double undef, %tmp312
  83   %tmp314 = fadd double %tmp313, undef
  84   %tmp315 = fptrunc double %tmp314 to float
  85   %tmp317 = fptrunc double undef to float
  86   %tmp319 = fptrunc double undef to float
  87   br label %bb283
  88 }
  89
  90 ; Make sure that we probably handle constant folded vectorized trees. The
  91 ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
  92 ; The code that handles insertelement instructions must handle this.
  93 define <4 x double> @constant_folding() {
  94 ; CHECK-LABEL: @constant_folding(
  95 ; CHECK-NEXT:  entry:
  96 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
  97 ; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
  98 ; CHECK-NEXT:    ret <4 x double> [[I2]]
  99 ;
 100 entry:
 101   %t0 = fadd double 1.000000e+00 , 0.000000e+00
 102   %t1 = fadd double 1.000000e+00 , 1.000000e+00
 103   %t2 = fmul double %t0, 1.000000e+00
 104   %i1 = insertelement <4 x double> undef, double %t2, i32 1
 105   %t3 = fmul double %t1, 1.000000e+00
 106   %i2 = insertelement <4 x double> %i1, double %t3, i32 0
 107   ret <4 x double> %i2
 108 }