llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll

   1 ; RUN: llc -march=amdgcn < %s | FileCheck %s
   2
   3 ; Check we can compile this bugpoint-reduced test without an
   4 ; infinite loop in TLI.SimplifyDemandedBits() due to failure
   5 ; to use return value of TLO.DAG.UpdateNodeOperands()
   6
   7 ; Check that code was generated; we know there will be
   8 ; a s_endpgm, so check for it.
   9
  10 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
  11
  12 ; Function Attrs: nounwind readnone speculatable
  13 declare i32 @llvm.amdgcn.workitem.id.y() #0
  14
  15 ; Function Attrs: nounwind readnone speculatable
  16 declare i32 @llvm.amdgcn.workitem.id.x() #0
  17
  18 ; Function Attrs: nounwind readnone speculatable
  19 declare float @llvm.fmuladd.f32(float, float, float) #0
  20
  21 ; CHECK: s_endpgm
  22 define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
  23 bb:
  24   %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
  25   %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
  26   %tmp5 = and i32 %tmp, 15
  27   %tmp6 = mul nuw nsw i32 %tmp5, 21
  28   %tmp7 = sub i32 %tmp6, 0
  29   %tmp8 = add i32 %tmp7, 0
  30   %tmp9 = add i32 %tmp8, 0
  31   br label %bb12
  32
  33 bb11:                                             ; preds = %bb30
  34   br i1 %c0, label %bb37, label %bb38
  35
  36 bb12:                                             ; preds = %bb30, %bb
  37   br i1 false, label %.preheader, label %.loopexit145
  38
  39 .loopexit145:                                     ; preds = %.preheader, %bb12
  40   br label %bb13
  41
  42 bb13:                                             ; preds = %.loopexit, %.loopexit145
  43   %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
  44   %tmp15 = add nsw i32 %tmp14, -3
  45   %tmp16 = mul i32 %tmp14, 21
  46   br i1 %c1, label %bb17, label %.loopexit
  47
  48 bb17:                                             ; preds = %bb13
  49   %tmp18 = mul i32 %tmp15, 224
  50   %tmp19 = add i32 undef, %tmp18
  51   br label %bb21
  52
  53 .loopexit:                                        ; preds = %bb21, %bb13
  54   %tmp20 = add nuw nsw i32 %tmp14, 16
  55   br i1 %c2, label %bb13, label %bb26
  56
  57 bb21:                                             ; preds = %bb21, %bb17
  58   %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
  59   %tmp23 = add i32 %tmp22, %tmp16
  60   %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23
  61   store float undef, ptr addrspace(3) %tmp24, align 4
  62   %tmp25 = add nuw i32 %tmp22, 8
  63   br i1 %c3, label %bb21, label %.loopexit
  64
  65 bb26:                                             ; preds = %.loopexit
  66   br label %bb31
  67
  68 .preheader:                                       ; preds = %.preheader, %bb12
  69   %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ]
  70   %tmp28 = add nuw i32 %tmp27, 128
  71   %tmp29 = icmp ult i32 %tmp28, 1568
  72   br i1 %tmp29, label %.preheader, label %.loopexit145
  73
  74 bb30:                                             ; preds = %bb31
  75   br i1 %c4, label %bb11, label %bb12
  76
  77 bb31:                                             ; preds = %bb31, %bb26
  78   %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ]
  79   %tmp33 = getelementptr inbounds [462 x float], ptr addrspace(3) @0, i32 0, i32 %tmp32
  80   %tmp34 = load float, ptr addrspace(3) %tmp33, align 4
  81   %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef)
  82   %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35)
  83   br i1 %c5, label %bb30, label %bb31
  84
  85 bb37:                                             ; preds = %bb11
  86   br label %bb38
  87
  88 bb38:                                             ; preds = %bb37, %bb11
  89   ret void
  90 }
  91
  92 attributes #0 = { nounwind readnone speculatable }
  93
  94 !0 = !{i32 8, i32 16, i32 1}