llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll

   1 ; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
   2 ; PR11829
   3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
   4 target triple = "armv7-none-linux-gnueabi"
   5
   6 define arm_aapcs_vfpcc void @foo(ptr nocapture %arg) nounwind uwtable align 2 {
   7 bb:
   8   br i1 undef, label %bb1, label %bb2
   9
  10 bb1:                                              ; preds = %bb
  11   unreachable
  12
  13 bb2:                                              ; preds = %bb
  14   br label %bb3
  15
  16 bb3:                                              ; preds = %bb4, %bb2
  17   %tmp = icmp slt i32 undef, undef
  18   br i1 %tmp, label %bb4, label %bb67
  19
  20 bb4:                                              ; preds = %bb3
  21   %tmp5 = load <4 x i32>, ptr undef, align 16
  22   %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
  23   %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
  24   %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
  25   %constexpr = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128
  26   %constexpr1 = lshr i128 %constexpr, 64
  27   %constexpr2 = trunc i128 %constexpr1 to i64
  28   %constexpr3 = zext i64 %constexpr2 to i128
  29   %constexpr4 = shl i128 %constexpr3, 64
  30   %constexpr5 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128
  31   %constexpr6 = trunc i128 %constexpr5 to i64
  32   %constexpr7 = zext i64 %constexpr6 to i128
  33   %or = or i128 %constexpr4, %constexpr7
  34   %bc = bitcast i128 %or to <4 x float>
  35   %tmp9 = fsub <4 x float> %tmp8, %bc
  36   %tmp10 = fmul <4 x float> undef, %tmp9
  37   %tmp11 = fadd <4 x float> undef, %tmp10
  38   %tmp12 = bitcast <4 x float> zeroinitializer to i128
  39   %tmp13 = lshr i128 %tmp12, 64
  40   %tmp14 = trunc i128 %tmp13 to i64
  41   %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
  42   %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) #3
  43   %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) #3
  44   %tmp18 = fmul <4 x float> %tmp17, %tmp16
  45   %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) #3
  46   %tmp20 = fmul <4 x float> %tmp19, %tmp18
  47   %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
  48   %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) #3
  49   call arm_aapcs_vfpcc void @bar(ptr null, ptr undef, ptr undef, [2 x i64] zeroinitializer) #3
  50   %tmp23 = bitcast <4 x float> %tmp22 to i128
  51   %tmp24 = trunc i128 %tmp23 to i64
  52   %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
  53   %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
  54   %tmp27 = load float, ptr undef, align 4
  55   %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
  56   %tmp29 = load <4 x i32>, ptr undef, align 16
  57   %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
  58   %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
  59   %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
  60   %constexpr8 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128
  61   %constexpr9 = lshr i128 %constexpr8, 64
  62   %constexpr10 = trunc i128 %constexpr9 to i64
  63   %constexpr11 = zext i64 %constexpr10 to i128
  64   %constexpr12 = shl i128 %constexpr11, 64
  65   %constexpr13 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128
  66   %constexpr14 = trunc i128 %constexpr13 to i64
  67   %constexpr15 = zext i64 %constexpr14 to i128
  68   %or2 = or i128 %constexpr12, %constexpr15
  69   %bc2 = bitcast i128 %or2 to <4 x float>
  70   %tmp33 = fsub <4 x float> %tmp32, %bc2
  71   %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) #3
  72   %tmp35 = fmul <4 x float> %tmp34, undef
  73   %tmp36 = fmul <4 x float> %tmp35, undef
  74   %tmp37 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3
  75   %tmp38 = load float, ptr undef, align 4
  76   %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
  77   %tmp40 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3
  78   %tmp41 = load float, ptr undef, align 4
  79   %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
  80   %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
  81   %tmp44 = fmul <4 x float> %tmp33, %tmp43
  82   %tmp45 = fadd <4 x float> %tmp42, %tmp44
  83   %tmp46 = fsub <4 x float> %tmp45, undef
  84   %tmp47 = fmul <4 x float> %tmp46, %tmp36
  85   %tmp48 = fadd <4 x float> undef, %tmp47
  86   %tmp49 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3
  87   %tmp50 = load float, ptr undef, align 4
  88   %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
  89   %tmp52 = call arm_aapcs_vfpcc ptr null(ptr undef) #3
  90   %tmp54 = load float, ptr %tmp52, align 4
  91   %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
  92   %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
  93   %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) #3
  94   %tmp58 = fmul <4 x float> undef, %tmp57
  95   %tmp59 = fsub <4 x float> %tmp51, %tmp48
  96   %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
  97   %tmp61 = fmul <4 x float> %tmp59, %tmp60
  98   %tmp62 = fadd <4 x float> %tmp48, %tmp61
  99   call arm_aapcs_vfpcc void @baz(ptr undef, ptr undef, [2 x i64] %tmp26, ptr undef)
 100   %tmp63 = bitcast <4 x float> %tmp62 to i128
 101   %tmp64 = lshr i128 %tmp63, 64
 102   %tmp65 = trunc i128 %tmp64 to i64
 103   %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
 104   call arm_aapcs_vfpcc void @quux(ptr undef, ptr undef, [2 x i64] undef, ptr undef, [2 x i64] %tmp66, ptr undef, ptr undef, [2 x i64] %tmp26, [2 x i64] %tmp15, ptr undef)
 105   br label %bb3
 106
 107 bb67:                                             ; preds = %bb3
 108   ret void
 109 }
 110
 111 declare arm_aapcs_vfpcc void @bar(ptr, ptr, ptr, [2 x i64])
 112
 113 declare arm_aapcs_vfpcc void @baz(ptr, ptr nocapture, [2 x i64], ptr nocapture) nounwind uwtable inlinehint align 2
 114
 115 declare arm_aapcs_vfpcc void @quux(ptr, ptr, [2 x i64], ptr nocapture, [2 x i64], ptr nocapture, ptr nocapture, [2 x i64], [2 x i64], ptr nocapture) nounwind uwtable inlinehint align 2
 116
 117 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
 118
 119 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
 120
 121 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone