test/CodeGen/Thumb2/thumb2-spill-q.ll

   1 ; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
   2 ; PR4789
   3
   4 %bar = type { float, float, float }
   5 %baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
   6 %foo = type { <4 x float> }
   7 %quux = type { i32 (...)**, %baz*, i32 }
   8 %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
   9
  10 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
  11
  12 define void @aaa(%quuz* %this, i8* %block) {
  13 ; CHECK: aaa:
  14 ; CHECK: bic r4, r4, #15
  15 ; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
  16 ; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
  17 entry:
  18   %aligned_vec = alloca <4 x float>, align 16
  19   %"alloca point" = bitcast i32 0 to i32
  20   %vecptr = bitcast <4 x float>* %aligned_vec to i8*
  21   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind
  22   store float 6.300000e+01, float* undef, align 4
  23   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
  24   store float 0.000000e+00, float* undef, align 4
  25   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
  26   %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  27   store float 0.000000e+00, float* undef, align 4
  28   %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  29   store float 0.000000e+00, float* undef, align 4
  30   %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  31   store float 0.000000e+00, float* undef, align 4
  32   %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  33   store float 0.000000e+00, float* undef, align 4
  34   %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  35   store float 0.000000e+00, float* undef, align 4
  36   %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  37   store float 0.000000e+00, float* undef, align 4
  38   %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  39   store float 0.000000e+00, float* undef, align 4
  40   %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  41   store float 0.000000e+00, float* undef, align 4
  42   %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  43   store float 0.000000e+00, float* undef, align 4
  44   %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
  45   store float 0.000000e+00, float* undef, align 4
  46   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
  47   br label %bb4
  48
  49 bb4:                                              ; preds = %bb193, %entry
  50   %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
  51   %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
  52   %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
  53   %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
  54   %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
  55   %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
  56   %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
  57   %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
  58   %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
  59   %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
  60   %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
  61   %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
  62   %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
  63   %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
  64   %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
  65   %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
  66   %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
  67   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
  68   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
  69   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
  70   %tmp1 = fadd <4 x float> %20, %ld3
  71   %tmp2 = fadd <4 x float> %tmp1, %ld4
  72   %tmp3 = fadd <4 x float> %tmp2, %ld5
  73   %tmp4 = fadd <4 x float> %tmp3, %ld6
  74   %tmp5 = fadd <4 x float> %tmp4, %ld7
  75   %tmp6 = fadd <4 x float> %tmp5, %ld8
  76   %tmp7 = fadd <4 x float> %tmp6, %ld9
  77   %tmp8 = fadd <4 x float> %tmp7, %ld10
  78   %tmp9 = fadd <4 x float> %tmp8, %ld11
  79   %21 = fadd <4 x float> %tmp9, %ld12
  80   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
  81   %tmp = extractelement <4 x i1> %22, i32 0
  82   br i1 %tmp, label %bb193, label %bb186
  83
  84 bb186:                                            ; preds = %bb4
  85   br label %bb193
  86
  87 bb193:                                            ; preds = %bb186, %bb4
  88   %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
  89   %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
  90   br label %bb4
  91 }