test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll

   1 ; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
   2
   3 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
   4 target triple = "nvptx64-nvidia-cuda"
   5
   6 ; This test contains an unstructured loop.
   7 ;           +-------------- entry ----------------+
   8 ;           |                                     |
   9 ;           V                                     V
  10 ; i1 = phi(0, i3)                            i2 = phi(0, i3)
  11 ;     j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
  12 ;           ^                 |                   ^
  13 ;           |                 V                   |
  14 ;           +-------- switch (tid / i3) ----------+
  15 ;                             |
  16 ;                             V
  17 ;                        if (i3 == 5) // divergent
  18 ; because sync dependent on (tid / i3).
  19 define i32 @unstructured_loop(i1 %entry_cond) {
  20 ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
  21 entry:
  22   %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
  23   br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
  24 loop_entry_1:
  25   %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
  26   %j1 = add i32 %i1, 1
  27   br label %loop_body
  28 loop_entry_2:
  29   %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
  30   %j2 = add i32 %i2, 2
  31   br label %loop_body
  32 loop_body:
  33   %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
  34   br label %loop_latch
  35 loop_latch:
  36   %div = sdiv i32 %tid, %i3
  37   switch i32 %div, label %branch [ i32 1, label %loop_entry_1
  38                                    i32 2, label %loop_entry_2 ]
  39 branch:
  40   %cmp = icmp eq i32 %i3, 5
  41   br i1 %cmp, label %then, label %else
  42 ; CHECK: DIVERGENT: br i1 %cmp,
  43 then:
  44   ret i32 0
  45 else:
  46   ret i32 1
  47 }
  48
  49 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
  50 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
  51 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
  52 declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
  53
  54 !nvvm.annotations = !{!0}
  55 !0 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}