llvm/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
   3
   4 ; This test is designed to check that the backedge from a prior block won't
   5 ; reset the variable introduced to record and accumulate the number of threads
   6 ; which have already exited the loop.
   7
   8 define amdgpu_kernel void @multiple_backedges(i32 %arg, i32* %arg1) {
   9 ; OPT-LABEL: @multiple_backedges(
  10 ; OPT-NEXT:  entry:
  11 ; OPT-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
  12 ; OPT-NEXT:    [[TMP2:%.*]] = shl nsw i32 [[ARG:%.*]], 1
  13 ; OPT-NEXT:    br label [[LOOP:%.*]]
  14 ; OPT:       loop:
  15 ; OPT-NEXT:    [[PHI_BROKEN1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOOP_END:%.*]] ], [ [[PHI_BROKEN1]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
  16 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ 0, [[LOOP_END]] ], [ [[TMP0:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
  17 ; OPT-NEXT:    [[TMP4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP5:%.*]], [[LOOP]] ], [ 0, [[LOOP_END]] ]
  18 ; OPT-NEXT:    [[TMP5]] = add nsw i32 [[TMP4]], [[TMP]]
  19 ; OPT-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[ARG]], [[TMP5]]
  20 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN]])
  21 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
  22 ; OPT-NEXT:    br i1 [[TMP1]], label [[LOOP_END]], label [[LOOP]]
  23 ; OPT:       loop_end:
  24 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
  25 ; OPT-NEXT:    [[EXIT:%.*]] = icmp sgt i32 [[TMP5]], [[TMP2]]
  26 ; OPT-NEXT:    [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[EXIT]], i64 [[PHI_BROKEN1]])
  27 ; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP7]])
  28 ; OPT-NEXT:    br i1 [[TMP3]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
  29 ; OPT:       loop_exit:
  30 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
  31 ; OPT-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP]] to i64
  32 ; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 [[TMP12]]
  33 ; OPT-NEXT:    [[TMP14:%.*]] = addrspacecast i32* [[TMP13]] to i32 addrspace(1)*
  34 ; OPT-NEXT:    store i32 [[TMP5]], i32 addrspace(1)* [[TMP14]], align 4
  35 ; OPT-NEXT:    ret void
  36 ;
  37 entry:
  38   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  39   %tmp2 = shl nsw i32 %arg, 1
  40   br label %loop
  41
  42 loop:
  43   %tmp4 = phi i32 [ 0, %entry ], [ %tmp5, %loop ], [ 0, %loop_end ]
  44   %tmp5 = add nsw i32 %tmp4, %tmp
  45   %tmp6 = icmp slt i32 %arg, %tmp5
  46   br i1 %tmp6, label %loop_end, label %loop
  47
  48 loop_end:
  49   %exit = icmp sgt i32 %tmp5, %tmp2
  50   br i1 %exit, label %loop_exit, label %loop
  51
  52 loop_exit:
  53   %tmp12 = zext i32 %tmp to i64
  54   %tmp13 = getelementptr inbounds i32, i32* %arg1, i64 %tmp12
  55   %tmp14 = addrspacecast i32* %tmp13 to i32 addrspace(1)*
  56   store i32 %tmp5, i32 addrspace(1)* %tmp14, align 4
  57   ret void
  58 }
  59
  60 ; Function Attrs: nounwind readnone speculatable
  61 declare i32 @llvm.amdgcn.workitem.id.x()