test/CodeGen/AMDGPU/sgprcopies.ll

   1 ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefix=GCN %s
   2
   3 ; GCN-LABEL: {{^}}checkTwoBlocksWithUniformBranch
   4 ; GCN: BB0_2
   5 ; GCN: v_add
   6 define amdgpu_kernel void @checkTwoBlocksWithUniformBranch(i32 addrspace(1)* nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) {
   7 entry:
   8   %conv = call i32 @llvm.amdgcn.workitem.id.x() #1
   9   %rem = urem i32 %conv, %width
  10   %div = udiv i32 %conv, %width
  11   %conv1 = sitofp i32 %rem to float
  12   %x = tail call float @llvm.fmuladd.f32(float %xStep, float %conv1, float %xPos)
  13   %conv2 = sitofp i32 %div to float
  14   %y = tail call float @llvm.fmuladd.f32(float %yStep, float %conv2, float %yPos)
  15   %yy = fmul float %y, %y
  16   %xy = tail call float @llvm.fmuladd.f32(float %x, float %x, float %yy)
  17   %cmp01 = fcmp ole float %xy, 4.000000e+00
  18   %cmp02 = icmp ne i32 %maxIter, 0
  19   %cond01 = and i1 %cmp02, %cmp01
  20   br i1 %cond01, label %for.body.preheader, label %for.end
  21
  22 for.body.preheader:                               ; preds = %entry
  23   br label %for.body
  24
  25 for.body:                                         ; preds = %for.body.preheader, %for.body
  26   %x_val = phi float [ %call8, %for.body ], [ %x, %for.body.preheader ]
  27   %iter_val = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
  28   %y_val = phi float [ %call9, %for.body ], [ %y, %for.body.preheader ]
  29   %sub = fsub float -0.000000e+00, %y_val
  30   %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) #1
  31   %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) #1
  32   %mul = fmul float %x_val, 2.000000e+00
  33   %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) #1
  34   %inc = add nuw i32 %iter_val, 1
  35   %mul3 = fmul float %call9, %call9
  36   %0 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3)
  37   %cmp = fcmp ole float %0, 4.000000e+00
  38   %cmp5 = icmp ult i32 %inc, %maxIter
  39   %or.cond = and i1 %cmp5, %cmp
  40   br i1 %or.cond, label %for.body, label %for.end.loopexit
  41
  42 for.end.loopexit:                                 ; preds = %for.body
  43   br label %for.end
  44
  45 for.end:                                          ; preds = %for.end.loopexit, %entry
  46   %iter.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %for.end.loopexit ]
  47   %idxprom = ashr exact i32 %conv, 32
  48   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idxprom
  49   store i32 %iter.0.lcssa, i32 addrspace(1)* %arrayidx, align 4
  50   ret void
  51 }
  52
  53 ; Function Attrs: nounwind readnone
  54 declare i32 @llvm.amdgcn.workitem.id.x() #0
  55 declare float @llvm.fmuladd.f32(float, float, float) #1
  56
  57 attributes #0 = { nounwind readnone }
  58 attributes #1 = { readnone }