llvm/test/CodeGen/AMDGPU/loop_break.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
   3 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
   4
   5 ; Uses llvm.amdgcn.break
   6
   7 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
   8 ; OPT-LABEL: @break_loop(
   9 ; OPT-NEXT:  bb:
  10 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  11 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  12 ; OPT-NEXT:    br label [[BB1:%.*]]
  13 ; OPT:       bb1:
  14 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  15 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
  16 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
  17 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  18 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  19 ; OPT:       bb4:
  20 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
  21 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  22 ; OPT-NEXT:    br label [[FLOW]]
  23 ; OPT:       Flow:
  24 ; OPT-NEXT:    [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  25 ; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
  26 ; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
  27 ; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
  28 ; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
  29 ; OPT:       bb9:
  30 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
  31 ; OPT-NEXT:    ret void
  32 ;
  33 ; GCN-LABEL: break_loop:
  34 ; GCN:       ; %bb.0: ; %bb
  35 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
  36 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
  37 ; GCN-NEXT:    s_mov_b32 s2, -1
  38 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
  39 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
  40 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
  41 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
  42 ; GCN-NEXT:    ; implicit-def: $sgpr6
  43 ; GCN-NEXT:  .LBB0_1: ; %bb1
  44 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
  45 ; GCN-NEXT:    s_add_i32 s6, s6, 1
  46 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
  47 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
  48 ; GCN-NEXT:    s_cbranch_scc0 .LBB0_3
  49 ; GCN-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
  50 ; GCN-NEXT:    ; implicit-def: $sgpr6
  51 ; GCN-NEXT:    s_branch .LBB0_4
  52 ; GCN-NEXT:  .LBB0_3: ; %bb4
  53 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  54 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
  55 ; GCN-NEXT:    s_waitcnt vmcnt(0)
  56 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
  57 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
  58 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
  59 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
  60 ; GCN-NEXT:  .LBB0_4: ; %Flow
  61 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  62 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
  63 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
  64 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
  65 ; GCN-NEXT:    s_cbranch_execnz .LBB0_1
  66 ; GCN-NEXT:  ; %bb.5: ; %bb9
  67 ; GCN-NEXT:    s_endpgm
  68 bb:
  69   %id = call i32 @llvm.amdgcn.workitem.id.x()
  70   %my.tmp = sub i32 %id, %arg
  71   br label %bb1
  72
  73 bb1:
  74   %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
  75   %lsr.iv.next = add i32 %lsr.iv, 1
  76   %cmp0 = icmp slt i32 %lsr.iv.next, 0
  77   br i1 %cmp0, label %bb4, label %bb9
  78
  79 bb4:
  80   %load = load volatile i32, ptr addrspace(1) undef, align 4
  81   %cmp1 = icmp slt i32 %my.tmp, %load
  82   br i1 %cmp1, label %bb1, label %bb9
  83
  84 bb9:
  85   ret void
  86 }
  87
  88 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
  89 ; OPT-LABEL: @undef_phi_cond_break_loop(
  90 ; OPT-NEXT:  bb:
  91 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  92 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  93 ; OPT-NEXT:    br label [[BB1:%.*]]
  94 ; OPT:       bb1:
  95 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  96 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
  97 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
  98 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  99 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 100 ; OPT:       bb4:
 101 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 102 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 103 ; OPT-NEXT:    br label [[FLOW]]
 104 ; OPT:       Flow:
 105 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 106 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
 107 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 108 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 109 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 110 ; OPT:       bb9:
 111 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 112 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 113 ; OPT-NEXT:    ret void
 114 ;
 115 ; GCN-LABEL: undef_phi_cond_break_loop:
 116 ; GCN:       ; %bb.0: ; %bb
 117 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 118 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 119 ; GCN-NEXT:    s_mov_b32 s2, -1
 120 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 121 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 122 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 123 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 124 ; GCN-NEXT:    ; implicit-def: $sgpr6
 125 ; GCN-NEXT:  .LBB1_1: ; %bb1
 126 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 127 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 128 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 129 ; GCN-NEXT:    s_cbranch_scc1 .LBB1_3
 130 ; GCN-NEXT:  ; %bb.2: ; %bb4
 131 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 132 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 133 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 134 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 135 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 136 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 137 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 138 ; GCN-NEXT:  .LBB1_3: ; %Flow
 139 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 140 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 141 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 142 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 143 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 144 ; GCN-NEXT:    s_cbranch_execnz .LBB1_1
 145 ; GCN-NEXT:  ; %bb.4: ; %bb9
 146 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 147 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 148 ; GCN-NEXT:    s_mov_b32 m0, -1
 149 ; GCN-NEXT:    ds_write_b32 v0, v0
 150 ; GCN-NEXT:    s_endpgm
 151 bb:
 152   %id = call i32 @llvm.amdgcn.workitem.id.x()
 153   %my.tmp = sub i32 %id, %arg
 154   br label %bb1
 155
 156 bb1:                                              ; preds = %Flow, %bb
 157   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 158   %lsr.iv.next = add i32 %lsr.iv, 1
 159   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 160   br i1 %cmp0, label %bb4, label %Flow
 161
 162 bb4:                                              ; preds = %bb1
 163   %load = load volatile i32, ptr addrspace(1) undef, align 4
 164   %cmp1 = icmp sge i32 %my.tmp, %load
 165   br label %Flow
 166
 167 Flow:                                             ; preds = %bb4, %bb1
 168   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 169   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
 170   br i1 %my.tmp3, label %bb9, label %bb1
 171
 172 bb9:                                              ; preds = %Flow
 173   store volatile i32 7, ptr addrspace(3) undef
 174   ret void
 175 }
 176
 177 ; FIXME: ConstantExpr compare of address to null folds away
 178 @lds = addrspace(3) global i32 undef
 179
 180 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
 181 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
 182 ; OPT-NEXT:  bb:
 183 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 184 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 185 ; OPT-NEXT:    br label [[BB1:%.*]]
 186 ; OPT:       bb1:
 187 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 188 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 189 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 190 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 191 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 192 ; OPT:       bb4:
 193 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 194 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 195 ; OPT-NEXT:    br label [[FLOW]]
 196 ; OPT:       Flow:
 197 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 198 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), ptr addrspace(3) @lds), [[BB1]] ]
 199 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 200 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 201 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 202 ; OPT:       bb9:
 203 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 204 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 205 ; OPT-NEXT:    ret void
 206 ;
 207 ; GCN-LABEL: constexpr_phi_cond_break_loop:
 208 ; GCN:       ; %bb.0: ; %bb
 209 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 210 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 211 ; GCN-NEXT:    s_mov_b32 s2, -1
 212 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 213 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 214 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 215 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 216 ; GCN-NEXT:    ; implicit-def: $sgpr6
 217 ; GCN-NEXT:  .LBB2_1: ; %bb1
 218 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 219 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 220 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 221 ; GCN-NEXT:    s_cbranch_scc1 .LBB2_3
 222 ; GCN-NEXT:  ; %bb.2: ; %bb4
 223 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 224 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 225 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 226 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 227 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 228 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 229 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 230 ; GCN-NEXT:  .LBB2_3: ; %Flow
 231 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 232 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 233 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 234 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 235 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 236 ; GCN-NEXT:    s_cbranch_execnz .LBB2_1
 237 ; GCN-NEXT:  ; %bb.4: ; %bb9
 238 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 239 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 240 ; GCN-NEXT:    s_mov_b32 m0, -1
 241 ; GCN-NEXT:    ds_write_b32 v0, v0
 242 ; GCN-NEXT:    s_endpgm
 243 bb:
 244   %id = call i32 @llvm.amdgcn.workitem.id.x()
 245   %my.tmp = sub i32 %id, %arg
 246   br label %bb1
 247
 248 bb1:                                              ; preds = %Flow, %bb
 249   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 250   %lsr.iv.next = add i32 %lsr.iv, 1
 251   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 252   br i1 %cmp0, label %bb4, label %Flow
 253
 254 bb4:                                              ; preds = %bb1
 255   %load = load volatile i32, ptr addrspace(1) undef, align 4
 256   %cmp1 = icmp sge i32 %my.tmp, %load
 257   br label %Flow
 258
 259 Flow:                                             ; preds = %bb4, %bb1
 260   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 261   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), ptr addrspace(3) @lds), %bb1 ]
 262   br i1 %my.tmp3, label %bb9, label %bb1
 263
 264 bb9:                                              ; preds = %Flow
 265   store volatile i32 7, ptr addrspace(3) undef
 266   ret void
 267 }
 268
 269 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
 270 ; OPT-LABEL: @true_phi_cond_break_loop(
 271 ; OPT-NEXT:  bb:
 272 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 273 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 274 ; OPT-NEXT:    br label [[BB1:%.*]]
 275 ; OPT:       bb1:
 276 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 277 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 278 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 279 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 280 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 281 ; OPT:       bb4:
 282 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 283 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 284 ; OPT-NEXT:    br label [[FLOW]]
 285 ; OPT:       Flow:
 286 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 287 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 288 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 289 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 290 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 291 ; OPT:       bb9:
 292 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 293 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 294 ; OPT-NEXT:    ret void
 295 ;
 296 ; GCN-LABEL: true_phi_cond_break_loop:
 297 ; GCN:       ; %bb.0: ; %bb
 298 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 299 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 300 ; GCN-NEXT:    s_mov_b32 s2, -1
 301 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 302 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 303 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 304 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 305 ; GCN-NEXT:    ; implicit-def: $sgpr6
 306 ; GCN-NEXT:  .LBB3_1: ; %bb1
 307 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 308 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 309 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 310 ; GCN-NEXT:    s_cbranch_scc1 .LBB3_3
 311 ; GCN-NEXT:  ; %bb.2: ; %bb4
 312 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 313 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 314 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 315 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 316 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 317 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 318 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 319 ; GCN-NEXT:  .LBB3_3: ; %Flow
 320 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 321 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 322 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 323 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 324 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 325 ; GCN-NEXT:    s_cbranch_execnz .LBB3_1
 326 ; GCN-NEXT:  ; %bb.4: ; %bb9
 327 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 328 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 329 ; GCN-NEXT:    s_mov_b32 m0, -1
 330 ; GCN-NEXT:    ds_write_b32 v0, v0
 331 ; GCN-NEXT:    s_endpgm
 332 bb:
 333   %id = call i32 @llvm.amdgcn.workitem.id.x()
 334   %my.tmp = sub i32 %id, %arg
 335   br label %bb1
 336
 337 bb1:                                              ; preds = %Flow, %bb
 338   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 339   %lsr.iv.next = add i32 %lsr.iv, 1
 340   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 341   br i1 %cmp0, label %bb4, label %Flow
 342
 343 bb4:                                              ; preds = %bb1
 344   %load = load volatile i32, ptr addrspace(1) undef, align 4
 345   %cmp1 = icmp sge i32 %my.tmp, %load
 346   br label %Flow
 347
 348 Flow:                                             ; preds = %bb4, %bb1
 349   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 350   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 351   br i1 %my.tmp3, label %bb9, label %bb1
 352
 353 bb9:                                              ; preds = %Flow
 354   store volatile i32 7, ptr addrspace(3) undef
 355   ret void
 356 }
 357
 358 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
 359 ; OPT-LABEL: @false_phi_cond_break_loop(
 360 ; OPT-NEXT:  bb:
 361 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 362 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 363 ; OPT-NEXT:    br label [[BB1:%.*]]
 364 ; OPT:       bb1:
 365 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 366 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 367 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 368 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 369 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 370 ; OPT:       bb4:
 371 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 372 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 373 ; OPT-NEXT:    br label [[FLOW]]
 374 ; OPT:       Flow:
 375 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 376 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
 377 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 378 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 379 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 380 ; OPT:       bb9:
 381 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 382 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 383 ; OPT-NEXT:    ret void
 384 ;
 385 ; GCN-LABEL: false_phi_cond_break_loop:
 386 ; GCN:       ; %bb.0: ; %bb
 387 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 388 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 389 ; GCN-NEXT:    s_mov_b32 s2, -1
 390 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 391 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 392 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 393 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 394 ; GCN-NEXT:    ; implicit-def: $sgpr6
 395 ; GCN-NEXT:  .LBB4_1: ; %bb1
 396 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 397 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 398 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 399 ; GCN-NEXT:    s_cbranch_scc1 .LBB4_3
 400 ; GCN-NEXT:  ; %bb.2: ; %bb4
 401 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 402 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 403 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 404 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 405 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 406 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 407 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 408 ; GCN-NEXT:  .LBB4_3: ; %Flow
 409 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 410 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 411 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 412 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 413 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 414 ; GCN-NEXT:    s_cbranch_execnz .LBB4_1
 415 ; GCN-NEXT:  ; %bb.4: ; %bb9
 416 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 417 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 418 ; GCN-NEXT:    s_mov_b32 m0, -1
 419 ; GCN-NEXT:    ds_write_b32 v0, v0
 420 ; GCN-NEXT:    s_endpgm
 421 bb:
 422   %id = call i32 @llvm.amdgcn.workitem.id.x()
 423   %my.tmp = sub i32 %id, %arg
 424   br label %bb1
 425
 426 bb1:                                              ; preds = %Flow, %bb
 427   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 428   %lsr.iv.next = add i32 %lsr.iv, 1
 429   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 430   br i1 %cmp0, label %bb4, label %Flow
 431
 432 bb4:                                              ; preds = %bb1
 433   %load = load volatile i32, ptr addrspace(1) undef, align 4
 434   %cmp1 = icmp sge i32 %my.tmp, %load
 435   br label %Flow
 436
 437 Flow:                                             ; preds = %bb4, %bb1
 438   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 439   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
 440   br i1 %my.tmp3, label %bb9, label %bb1
 441
 442 bb9:                                              ; preds = %Flow
 443   store volatile i32 7, ptr addrspace(3) undef
 444   ret void
 445 }
 446
 447 ; Swap order of branches in flow block so that the true phi is
 448 ; continue.
 449
 450 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
 451 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
 452 ; OPT-NEXT:  bb:
 453 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 454 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 455 ; OPT-NEXT:    br label [[BB1:%.*]]
 456 ; OPT:       bb1:
 457 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 458 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 459 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 460 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 461 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 462 ; OPT:       bb4:
 463 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 464 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 465 ; OPT-NEXT:    br label [[FLOW]]
 466 ; OPT:       Flow:
 467 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 468 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 469 ; OPT-NEXT:    [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
 470 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
 471 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 472 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 473 ; OPT:       bb9:
 474 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 475 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 476 ; OPT-NEXT:    ret void
 477 ;
 478 ; GCN-LABEL: invert_true_phi_cond_break_loop:
 479 ; GCN:       ; %bb.0: ; %bb
 480 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 481 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 482 ; GCN-NEXT:    s_mov_b32 s2, -1
 483 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 484 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 485 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 486 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 487 ; GCN-NEXT:    ; implicit-def: $sgpr6
 488 ; GCN-NEXT:  .LBB5_1: ; %bb1
 489 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 490 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 491 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 492 ; GCN-NEXT:    s_cbranch_scc1 .LBB5_3
 493 ; GCN-NEXT:  ; %bb.2: ; %bb4
 494 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 495 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 496 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 497 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 498 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 499 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 500 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 501 ; GCN-NEXT:  .LBB5_3: ; %Flow
 502 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 503 ; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 504 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 505 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
 506 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 507 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 508 ; GCN-NEXT:    s_cbranch_execnz .LBB5_1
 509 ; GCN-NEXT:  ; %bb.4: ; %bb9
 510 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 511 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 512 ; GCN-NEXT:    s_mov_b32 m0, -1
 513 ; GCN-NEXT:    ds_write_b32 v0, v0
 514 ; GCN-NEXT:    s_endpgm
 515 bb:
 516   %id = call i32 @llvm.amdgcn.workitem.id.x()
 517   %my.tmp = sub i32 %id, %arg
 518   br label %bb1
 519
 520 bb1:                                              ; preds = %Flow, %bb
 521   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 522   %lsr.iv.next = add i32 %lsr.iv, 1
 523   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 524   br i1 %cmp0, label %bb4, label %Flow
 525
 526 bb4:                                              ; preds = %bb1
 527   %load = load volatile i32, ptr addrspace(1) undef, align 4
 528   %cmp1 = icmp sge i32 %my.tmp, %load
 529   br label %Flow
 530
 531 Flow:                                             ; preds = %bb4, %bb1
 532   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 533   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 534   br i1 %my.tmp3, label %bb1, label %bb9
 535
 536 bb9:                                              ; preds = %Flow
 537   store volatile i32 7, ptr addrspace(3) undef
 538   ret void
 539 }
 540
 541 declare i32 @llvm.amdgcn.workitem.id.x() #1
 542
 543 attributes #0 = { nounwind }
 544 attributes #1 = { nounwind readnone }