llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s
   2
   3 target datalayout = "n32"
   4
   5 ; CHECK-LABEL: @switch_unreachable_default
   6
   7 define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
   8 centry:
   9   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  10   switch i32 %tid, label %sw.default [
  11     i32 0, label %sw.bb0
  12     i32 1, label %sw.bb1
  13   ]
  14
  15 sw.bb0:
  16   br label %sw.epilog
  17
  18 sw.bb1:
  19   br label %sw.epilog
  20
  21 sw.default:
  22   unreachable
  23
  24 sw.epilog:
  25   %ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1]
  26   br label %sw.while
  27
  28 ; The loop below is necessary to preserve the effect of the
  29 ; unreachable default on divergence analysis in the presence of other
  30 ; optimizations. The loop consists of a single block where the loop
  31 ; exit is divergent because it depends on the divergent phi at the
  32 ; start of the block. The checks below ensure that the loop exit is
  33 ; handled correctly as divergent. But the data-flow within the block
  34 ; is sensitive to optimizations; so we just ensure that the relevant
  35 ; operations in the block body are indeed in the same block.
  36
  37 ; CHECK: [[PHI:%[a-zA-Z0-9._]+]]  = phi i64
  38 ; CHECK-NOT: {{ br }}
  39 ; CHECK: load i8
  40 ; CHECK-NOT: {{ br }}
  41 ; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
  42 ; CHECK: [[IF:%[a-zA-Z0-9._]+]]   = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]])
  43 ; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
  44 ; CHECK: br i1 [[LOOP]]
  45
  46 sw.while:
  47   %p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ]
  48   %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
  49   %char = load i8, ptr addrspace(1) %p, align 1
  50   %tobool = icmp eq i8 %char, 0
  51   %incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1
  52   %count.inc = add i32 %count, 1
  53   br i1 %tobool, label %sw.exit, label %sw.while
  54
  55 sw.exit:
  56   %tid64 = zext i32 %tid to i64
  57   %gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64
  58   store i32 %count, ptr addrspace(1) %gep_out, align 4
  59   ret void
  60 }
  61
  62 declare i32 @llvm.amdgcn.workitem.id.x() #0
  63
  64 attributes #0 = { nounwind readnone }
  65 attributes #1 = { convergent noinline optnone }