llvm/test/CodeGen/AMDGPU/infinite-loop.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
   3 ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
   4
   5 define amdgpu_kernel void @infinite_loop(ptr addrspace(1) %out) {
   6 ; SI-LABEL: infinite_loop:
   7 ; SI:       ; %bb.0: ; %entry
   8 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
   9 ; SI-NEXT:    s_mov_b32 s3, 0xf000
  10 ; SI-NEXT:    s_mov_b32 s2, -1
  11 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7
  12 ; SI-NEXT:    s_and_b64 vcc, exec, -1
  13 ; SI-NEXT:  .LBB0_1: ; %loop
  14 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
  15 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
  16 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
  17 ; SI-NEXT:    s_waitcnt vmcnt(0)
  18 ; SI-NEXT:    s_mov_b64 vcc, vcc
  19 ; SI-NEXT:    s_cbranch_vccnz .LBB0_1
  20 ; SI-NEXT:  ; %bb.2: ; %DummyReturnBlock
  21 ; SI-NEXT:    s_endpgm
  22 ; IR-LABEL: @infinite_loop(
  23 ; IR-NEXT:  entry:
  24 ; IR-NEXT:    br label [[LOOP:%.*]]
  25 ; IR:       loop:
  26 ; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
  27 ; IR-NEXT:    br i1 true, label [[LOOP]], label [[DUMMYRETURNBLOCK:%.*]]
  28 ; IR:       DummyReturnBlock:
  29 ; IR-NEXT:    ret void
  30 ;
  31 entry:
  32   br label %loop
  33
  34 loop:
  35   store volatile i32 999, ptr addrspace(1) %out, align 4
  36   br label %loop
  37 }
  38
  39 define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {
  40 ; SI-LABEL: infinite_loop_ret:
  41 ; SI:       ; %bb.0: ; %entry
  42 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
  43 ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  44 ; SI-NEXT:    s_cbranch_execz .LBB1_3
  45 ; SI-NEXT:  ; %bb.1: ; %loop.preheader
  46 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
  47 ; SI-NEXT:    s_mov_b32 s3, 0xf000
  48 ; SI-NEXT:    s_mov_b32 s2, -1
  49 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7
  50 ; SI-NEXT:    s_and_b64 vcc, exec, -1
  51 ; SI-NEXT:  .LBB1_2: ; %loop
  52 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
  53 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
  54 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
  55 ; SI-NEXT:    s_waitcnt vmcnt(0)
  56 ; SI-NEXT:    s_mov_b64 vcc, vcc
  57 ; SI-NEXT:    s_cbranch_vccnz .LBB1_2
  58 ; SI-NEXT:  .LBB1_3: ; %UnifiedReturnBlock
  59 ; SI-NEXT:    s_endpgm
  60 ; IR-LABEL: @infinite_loop_ret(
  61 ; IR-NEXT:  entry:
  62 ; IR-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
  63 ; IR-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP]], 1
  64 ; IR-NEXT:    br i1 [[COND]], label [[LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
  65 ; IR:       loop:
  66 ; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
  67 ; IR-NEXT:    br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]]
  68 ; IR:       UnifiedReturnBlock:
  69 ; IR-NEXT:    ret void
  70 ;
  71 entry:
  72   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  73   %cond = icmp eq i32 %tmp, 1
  74   br i1 %cond, label %loop, label %return
  75
  76 loop:
  77   store volatile i32 999, ptr addrspace(1) %out, align 4
  78   br label %loop
  79
  80 return:
  81   ret void
  82 }
  83
  84 define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {
  85 ; SI-LABEL: infinite_loops:
  86 ; SI:       ; %bb.0: ; %entry
  87 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
  88 ; SI-NEXT:    s_mov_b64 s[2:3], -1
  89 ; SI-NEXT:    s_cbranch_scc1 .LBB2_4
  90 ; SI-NEXT:  ; %bb.1:
  91 ; SI-NEXT:    s_mov_b32 s3, 0xf000
  92 ; SI-NEXT:    s_mov_b32 s2, -1
  93 ; SI-NEXT:    v_mov_b32_e32 v0, 0x378
  94 ; SI-NEXT:    s_and_b64 vcc, exec, -1
  95 ; SI-NEXT:  .LBB2_2: ; %loop2
  96 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
  97 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
  98 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
  99 ; SI-NEXT:    s_waitcnt vmcnt(0)
 100 ; SI-NEXT:    s_mov_b64 vcc, vcc
 101 ; SI-NEXT:    s_cbranch_vccnz .LBB2_2
 102 ; SI-NEXT:  ; %bb.3: ; %Flow
 103 ; SI-NEXT:    s_mov_b64 s[2:3], 0
 104 ; SI-NEXT:  .LBB2_4: ; %Flow2
 105 ; SI-NEXT:    s_and_b64 vcc, exec, s[2:3]
 106 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 107 ; SI-NEXT:    s_mov_b64 vcc, vcc
 108 ; SI-NEXT:    s_cbranch_vccz .LBB2_7
 109 ; SI-NEXT:  ; %bb.5:
 110 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 111 ; SI-NEXT:    s_mov_b32 s2, -1
 112 ; SI-NEXT:    s_waitcnt expcnt(0)
 113 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7
 114 ; SI-NEXT:    s_and_b64 vcc, exec, 0
 115 ; SI-NEXT:  .LBB2_6: ; %loop1
 116 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 117 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 118 ; SI-NEXT:    s_waitcnt vmcnt(0)
 119 ; SI-NEXT:    s_mov_b64 vcc, vcc
 120 ; SI-NEXT:    s_cbranch_vccz .LBB2_6
 121 ; SI-NEXT:  .LBB2_7: ; %DummyReturnBlock
 122 ; SI-NEXT:    s_endpgm
 123 ; IR-LABEL: @infinite_loops(
 124 ; IR-NEXT:  entry:
 125 ; IR-NEXT:    br i1 undef, label [[LOOP1:%.*]], label [[LOOP2:%.*]]
 126 ; IR:       loop1:
 127 ; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
 128 ; IR-NEXT:    br i1 true, label [[LOOP1]], label [[DUMMYRETURNBLOCK:%.*]]
 129 ; IR:       loop2:
 130 ; IR-NEXT:    store volatile i32 888, ptr addrspace(1) [[OUT]], align 4
 131 ; IR-NEXT:    br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]]
 132 ; IR:       DummyReturnBlock:
 133 ; IR-NEXT:    ret void
 134 ;
 135 entry:
 136   br i1 undef, label %loop1, label %loop2
 137
 138 loop1:
 139   store volatile i32 999, ptr addrspace(1) %out, align 4
 140   br label %loop1
 141
 142 loop2:
 143   store volatile i32 888, ptr addrspace(1) %out, align 4
 144   br label %loop2
 145 }
 146
 147 define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
 148 ; SI-LABEL: infinite_loop_nest_ret:
 149 ; SI:       ; %bb.0: ; %entry
 150 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 151 ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 152 ; SI-NEXT:    s_cbranch_execz .LBB3_5
 153 ; SI-NEXT:  ; %bb.1: ; %outer_loop.preheader
 154 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 155 ; SI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 3, v0
 156 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 157 ; SI-NEXT:    s_mov_b32 s6, -1
 158 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7
 159 ; SI-NEXT:  .LBB3_2: ; %outer_loop
 160 ; SI-NEXT:    ; =>This Loop Header: Depth=1
 161 ; SI-NEXT:    ; Child Loop BB3_3 Depth 2
 162 ; SI-NEXT:    s_mov_b64 s[2:3], 0
 163 ; SI-NEXT:  .LBB3_3: ; %inner_loop
 164 ; SI-NEXT:    ; Parent Loop BB3_2 Depth=1
 165 ; SI-NEXT:    ; => This Inner Loop Header: Depth=2
 166 ; SI-NEXT:    s_and_b64 s[8:9], exec, s[0:1]
 167 ; SI-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
 168 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 169 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 170 ; SI-NEXT:    s_waitcnt vmcnt(0)
 171 ; SI-NEXT:    s_andn2_b64 exec, exec, s[2:3]
 172 ; SI-NEXT:    s_cbranch_execnz .LBB3_3
 173 ; SI-NEXT:  ; %bb.4: ; %loop.exit.guard
 174 ; SI-NEXT:    ; in Loop: Header=BB3_2 Depth=1
 175 ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
 176 ; SI-NEXT:    s_mov_b64 vcc, 0
 177 ; SI-NEXT:    s_branch .LBB3_2
 178 ; SI-NEXT:  .LBB3_5: ; %UnifiedReturnBlock
 179 ; SI-NEXT:    s_endpgm
 180 ; IR-LABEL: @infinite_loop_nest_ret(
 181 ; IR-NEXT:  entry:
 182 ; IR-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
 183 ; IR-NEXT:    [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
 184 ; IR-NEXT:    br i1 [[COND1]], label [[OUTER_LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
 185 ; IR:       outer_loop:
 186 ; IR-NEXT:    br label [[INNER_LOOP:%.*]]
 187 ; IR:       inner_loop:
 188 ; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
 189 ; IR-NEXT:    [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
 190 ; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK]]
 191 ; IR:       TransitionBlock:
 192 ; IR-NEXT:    br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]]
 193 ; IR:       UnifiedReturnBlock:
 194 ; IR-NEXT:    ret void
 195 ;
 196 entry:
 197   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
 198   %cond1 = icmp ne i32 %tmp, 1  ; avoid following BB optimizing away through the domination
 199   br i1 %cond1, label %outer_loop, label %return
 200
 201 outer_loop:
 202   ; %cond2 = icmp eq i32 %tmp, 2
 203   ; br i1 %cond2, label %outer_loop, label %inner_loop
 204   br label %inner_loop
 205
 206 inner_loop:                                     ; preds = %LeafBlock, %LeafBlock1
 207   store volatile i32 999, ptr addrspace(1) %out, align 4
 208   %cond3 = icmp eq i32 %tmp, 3
 209   br i1 %cond3, label %inner_loop, label %outer_loop
 210
 211 return:
 212   ret void
 213 }
 214
 215 declare i32 @llvm.amdgcn.workitem.id.x()