llvm/test/CodeGen/AMDGPU/loop_break.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
   3 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
   4
   5 ; Uses llvm.amdgcn.break
   6
   7 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
   8 ; OPT-LABEL: @break_loop(
   9 ; OPT-NEXT:  bb:
  10 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  11 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  12 ; OPT-NEXT:    br label [[BB1:%.*]]
  13 ; OPT:       bb1:
  14 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  15 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
  16 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
  17 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  18 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  19 ; OPT:       bb4:
  20 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
  21 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  22 ; OPT-NEXT:    br label [[FLOW]]
  23 ; OPT:       Flow:
  24 ; OPT-NEXT:    [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  25 ; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
  26 ; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
  27 ; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
  28 ; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
  29 ; OPT:       bb9:
  30 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
  31 ; OPT-NEXT:    ret void
  32 ;
  33 ; GCN-LABEL: break_loop:
  34 ; GCN:       ; %bb.0: ; %bb
  35 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
  36 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
  37 ; GCN-NEXT:    s_mov_b32 s2, -1
  38 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
  39 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
  40 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
  41 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
  42 ; GCN-NEXT:    ; implicit-def: $sgpr6
  43 ; GCN-NEXT:  .LBB0_1: ; %bb1
  44 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
  45 ; GCN-NEXT:    s_add_i32 s6, s6, 1
  46 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
  47 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
  48 ; GCN-NEXT:    s_cbranch_scc0 .LBB0_3
  49 ; GCN-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
  50 ; GCN-NEXT:    ; implicit-def: $sgpr6
  51 ; GCN-NEXT:    s_branch .LBB0_4
  52 ; GCN-NEXT:  .LBB0_3: ; %bb4
  53 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  54 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
  55 ; GCN-NEXT:    s_waitcnt vmcnt(0)
  56 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
  57 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
  58 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
  59 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
  60 ; GCN-NEXT:  .LBB0_4: ; %Flow
  61 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  62 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
  63 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
  64 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
  65 ; GCN-NEXT:    s_cbranch_execnz .LBB0_1
  66 ; GCN-NEXT:  ; %bb.5: ; %bb9
  67 ; GCN-NEXT:    s_endpgm
  68 bb:
  69   %id = call i32 @llvm.amdgcn.workitem.id.x()
  70   %my.tmp = sub i32 %id, %arg
  71   br label %bb1
  72
  73 bb1:
  74   %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
  75   %lsr.iv.next = add i32 %lsr.iv, 1
  76   %cmp0 = icmp slt i32 %lsr.iv.next, 0
  77   br i1 %cmp0, label %bb4, label %bb9
  78
  79 bb4:
  80   %load = load volatile i32, ptr addrspace(1) undef, align 4
  81   %cmp1 = icmp slt i32 %my.tmp, %load
  82   br i1 %cmp1, label %bb1, label %bb9
  83
  84 bb9:
  85   ret void
  86 }
  87
  88 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
  89 ; OPT-LABEL: @undef_phi_cond_break_loop(
  90 ; OPT-NEXT:  bb:
  91 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  92 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  93 ; OPT-NEXT:    br label [[BB1:%.*]]
  94 ; OPT:       bb1:
  95 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  96 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
  97 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
  98 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  99 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 100 ; OPT:       bb4:
 101 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 102 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 103 ; OPT-NEXT:    br label [[FLOW]]
 104 ; OPT:       Flow:
 105 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 106 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
 107 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 108 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 109 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 110 ; OPT:       bb9:
 111 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 112 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 113 ; OPT-NEXT:    ret void
 114 ;
 115 ; GCN-LABEL: undef_phi_cond_break_loop:
 116 ; GCN:       ; %bb.0: ; %bb
 117 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
 118 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 119 ; GCN-NEXT:    s_mov_b32 s2, -1
 120 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 121 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 122 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 123 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 124 ; GCN-NEXT:    ; implicit-def: $sgpr6
 125 ; GCN-NEXT:  .LBB1_1: ; %bb1
 126 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 127 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 128 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 129 ; GCN-NEXT:    s_cbranch_scc1 .LBB1_3
 130 ; GCN-NEXT:  ; %bb.2: ; %bb4
 131 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 132 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 133 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 134 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 135 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 136 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 137 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 138 ; GCN-NEXT:  .LBB1_3: ; %Flow
 139 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 140 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 141 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 142 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 143 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 144 ; GCN-NEXT:    s_cbranch_execnz .LBB1_1
 145 ; GCN-NEXT:  ; %bb.4: ; %bb9
 146 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 147 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 148 ; GCN-NEXT:    s_mov_b32 m0, -1
 149 ; GCN-NEXT:    ds_write_b32 v0, v0
 150 ; GCN-NEXT:    s_endpgm
 151 bb:
 152   %id = call i32 @llvm.amdgcn.workitem.id.x()
 153   %my.tmp = sub i32 %id, %arg
 154   br label %bb1
 155
 156 bb1:                                              ; preds = %Flow, %bb
 157   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 158   %lsr.iv.next = add i32 %lsr.iv, 1
 159   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 160   br i1 %cmp0, label %bb4, label %Flow
 161
 162 bb4:                                              ; preds = %bb1
 163   %load = load volatile i32, ptr addrspace(1) undef, align 4
 164   %cmp1 = icmp sge i32 %my.tmp, %load
 165   br label %Flow
 166
 167 Flow:                                             ; preds = %bb4, %bb1
 168   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 169   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
 170   br i1 %my.tmp3, label %bb9, label %bb1
 171
 172 bb9:                                              ; preds = %Flow
 173   store volatile i32 7, ptr addrspace(3) undef
 174   ret void
 175 }
 176
 177 ; FIXME: ConstantExpr compare of address to null folds away
 178 @lds = addrspace(3) global i32 undef
 179
 180 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
 181 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
 182 ; OPT-NEXT:  bb:
 183 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 184 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 185 ; OPT-NEXT:    br label [[BB1:%.*]]
 186 ; OPT:       bb1:
 187 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 188 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 189 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 190 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 191 ; OPT-NEXT:    [[CMP2:%.*]] = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
 192 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 193 ; OPT:       bb4:
 194 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 195 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 196 ; OPT-NEXT:    br label [[FLOW]]
 197 ; OPT:       Flow:
 198 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 199 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ [[CMP2]], [[BB1]] ]
 200 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 201 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 202 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 203 ; OPT:       bb9:
 204 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 205 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 206 ; OPT-NEXT:    ret void
 207 ;
 208 ; GCN-LABEL: constexpr_phi_cond_break_loop:
 209 ; GCN:       ; %bb.0: ; %bb
 210 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
 211 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 212 ; GCN-NEXT:    s_mov_b32 s2, -1
 213 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 214 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 215 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 216 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 217 ; GCN-NEXT:    ; implicit-def: $sgpr6
 218 ; GCN-NEXT:  .LBB2_1: ; %bb1
 219 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 220 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 221 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 222 ; GCN-NEXT:    s_cbranch_scc1 .LBB2_3
 223 ; GCN-NEXT:  ; %bb.2: ; %bb4
 224 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 225 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 226 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 227 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 228 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 229 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 230 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 231 ; GCN-NEXT:  .LBB2_3: ; %Flow
 232 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 233 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 234 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 235 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 236 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 237 ; GCN-NEXT:    s_cbranch_execnz .LBB2_1
 238 ; GCN-NEXT:  ; %bb.4: ; %bb9
 239 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 240 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 241 ; GCN-NEXT:    s_mov_b32 m0, -1
 242 ; GCN-NEXT:    ds_write_b32 v0, v0
 243 ; GCN-NEXT:    s_endpgm
 244 bb:
 245   %id = call i32 @llvm.amdgcn.workitem.id.x()
 246   %my.tmp = sub i32 %id, %arg
 247   br label %bb1
 248
 249 bb1:                                              ; preds = %Flow, %bb
 250   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 251   %lsr.iv.next = add i32 %lsr.iv, 1
 252   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 253   %cmp2 = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
 254   br i1 %cmp0, label %bb4, label %Flow
 255
 256 bb4:                                              ; preds = %bb1
 257   %load = load volatile i32, ptr addrspace(1) undef, align 4
 258   %cmp1 = icmp sge i32 %my.tmp, %load
 259   br label %Flow
 260
 261 Flow:                                             ; preds = %bb4, %bb1
 262   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 263   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ %cmp2, %bb1 ]
 264   br i1 %my.tmp3, label %bb9, label %bb1
 265
 266 bb9:                                              ; preds = %Flow
 267   store volatile i32 7, ptr addrspace(3) undef
 268   ret void
 269 }
 270
 271 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
 272 ; OPT-LABEL: @true_phi_cond_break_loop(
 273 ; OPT-NEXT:  bb:
 274 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 275 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 276 ; OPT-NEXT:    br label [[BB1:%.*]]
 277 ; OPT:       bb1:
 278 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 279 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 280 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 281 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 282 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 283 ; OPT:       bb4:
 284 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 285 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 286 ; OPT-NEXT:    br label [[FLOW]]
 287 ; OPT:       Flow:
 288 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 289 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 290 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 291 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 292 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 293 ; OPT:       bb9:
 294 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 295 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 296 ; OPT-NEXT:    ret void
 297 ;
 298 ; GCN-LABEL: true_phi_cond_break_loop:
 299 ; GCN:       ; %bb.0: ; %bb
 300 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
 301 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 302 ; GCN-NEXT:    s_mov_b32 s2, -1
 303 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 304 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 305 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 306 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 307 ; GCN-NEXT:    ; implicit-def: $sgpr6
 308 ; GCN-NEXT:  .LBB3_1: ; %bb1
 309 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 310 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 311 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 312 ; GCN-NEXT:    s_cbranch_scc1 .LBB3_3
 313 ; GCN-NEXT:  ; %bb.2: ; %bb4
 314 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 315 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 316 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 317 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 318 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 319 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 320 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 321 ; GCN-NEXT:  .LBB3_3: ; %Flow
 322 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 323 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 324 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 325 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 326 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 327 ; GCN-NEXT:    s_cbranch_execnz .LBB3_1
 328 ; GCN-NEXT:  ; %bb.4: ; %bb9
 329 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 330 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 331 ; GCN-NEXT:    s_mov_b32 m0, -1
 332 ; GCN-NEXT:    ds_write_b32 v0, v0
 333 ; GCN-NEXT:    s_endpgm
 334 bb:
 335   %id = call i32 @llvm.amdgcn.workitem.id.x()
 336   %my.tmp = sub i32 %id, %arg
 337   br label %bb1
 338
 339 bb1:                                              ; preds = %Flow, %bb
 340   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 341   %lsr.iv.next = add i32 %lsr.iv, 1
 342   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 343   br i1 %cmp0, label %bb4, label %Flow
 344
 345 bb4:                                              ; preds = %bb1
 346   %load = load volatile i32, ptr addrspace(1) undef, align 4
 347   %cmp1 = icmp sge i32 %my.tmp, %load
 348   br label %Flow
 349
 350 Flow:                                             ; preds = %bb4, %bb1
 351   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 352   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 353   br i1 %my.tmp3, label %bb9, label %bb1
 354
 355 bb9:                                              ; preds = %Flow
 356   store volatile i32 7, ptr addrspace(3) undef
 357   ret void
 358 }
 359
 360 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
 361 ; OPT-LABEL: @false_phi_cond_break_loop(
 362 ; OPT-NEXT:  bb:
 363 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 364 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 365 ; OPT-NEXT:    br label [[BB1:%.*]]
 366 ; OPT:       bb1:
 367 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 368 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 369 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 370 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 371 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 372 ; OPT:       bb4:
 373 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 374 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 375 ; OPT-NEXT:    br label [[FLOW]]
 376 ; OPT:       Flow:
 377 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 378 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
 379 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 380 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 381 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 382 ; OPT:       bb9:
 383 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 384 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 385 ; OPT-NEXT:    ret void
 386 ;
 387 ; GCN-LABEL: false_phi_cond_break_loop:
 388 ; GCN:       ; %bb.0: ; %bb
 389 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
 390 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 391 ; GCN-NEXT:    s_mov_b32 s2, -1
 392 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 393 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 394 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 395 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 396 ; GCN-NEXT:    ; implicit-def: $sgpr6
 397 ; GCN-NEXT:  .LBB4_1: ; %bb1
 398 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 399 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 400 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 401 ; GCN-NEXT:    s_cbranch_scc1 .LBB4_3
 402 ; GCN-NEXT:  ; %bb.2: ; %bb4
 403 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 404 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 405 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 406 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 407 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 408 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 409 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 410 ; GCN-NEXT:  .LBB4_3: ; %Flow
 411 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 412 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 413 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 414 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 415 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 416 ; GCN-NEXT:    s_cbranch_execnz .LBB4_1
 417 ; GCN-NEXT:  ; %bb.4: ; %bb9
 418 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 419 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 420 ; GCN-NEXT:    s_mov_b32 m0, -1
 421 ; GCN-NEXT:    ds_write_b32 v0, v0
 422 ; GCN-NEXT:    s_endpgm
 423 bb:
 424   %id = call i32 @llvm.amdgcn.workitem.id.x()
 425   %my.tmp = sub i32 %id, %arg
 426   br label %bb1
 427
 428 bb1:                                              ; preds = %Flow, %bb
 429   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 430   %lsr.iv.next = add i32 %lsr.iv, 1
 431   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 432   br i1 %cmp0, label %bb4, label %Flow
 433
 434 bb4:                                              ; preds = %bb1
 435   %load = load volatile i32, ptr addrspace(1) undef, align 4
 436   %cmp1 = icmp sge i32 %my.tmp, %load
 437   br label %Flow
 438
 439 Flow:                                             ; preds = %bb4, %bb1
 440   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 441   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
 442   br i1 %my.tmp3, label %bb9, label %bb1
 443
 444 bb9:                                              ; preds = %Flow
 445   store volatile i32 7, ptr addrspace(3) undef
 446   ret void
 447 }
 448
 449 ; Swap order of branches in flow block so that the true phi is
 450 ; continue.
 451
 452 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
 453 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
 454 ; OPT-NEXT:  bb:
 455 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 456 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 457 ; OPT-NEXT:    br label [[BB1:%.*]]
 458 ; OPT:       bb1:
 459 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 460 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
 461 ; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
 462 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 463 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 464 ; OPT:       bb4:
 465 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
 466 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 467 ; OPT-NEXT:    br label [[FLOW]]
 468 ; OPT:       Flow:
 469 ; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
 470 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 471 ; OPT-NEXT:    [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
 472 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
 473 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 474 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 475 ; OPT:       bb9:
 476 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 477 ; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
 478 ; OPT-NEXT:    ret void
 479 ;
 480 ; GCN-LABEL: invert_true_phi_cond_break_loop:
 481 ; GCN:       ; %bb.0: ; %bb
 482 ; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
 483 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 484 ; GCN-NEXT:    s_mov_b32 s2, -1
 485 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 486 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 487 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 488 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 489 ; GCN-NEXT:    ; implicit-def: $sgpr6
 490 ; GCN-NEXT:  .LBB5_1: ; %bb1
 491 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 492 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 493 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 494 ; GCN-NEXT:    s_cbranch_scc1 .LBB5_3
 495 ; GCN-NEXT:  ; %bb.2: ; %bb4
 496 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 497 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 498 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 499 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 500 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 501 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 502 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 503 ; GCN-NEXT:  .LBB5_3: ; %Flow
 504 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 505 ; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 506 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 507 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
 508 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 509 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 510 ; GCN-NEXT:    s_cbranch_execnz .LBB5_1
 511 ; GCN-NEXT:  ; %bb.4: ; %bb9
 512 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 513 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 514 ; GCN-NEXT:    s_mov_b32 m0, -1
 515 ; GCN-NEXT:    ds_write_b32 v0, v0
 516 ; GCN-NEXT:    s_endpgm
 517 bb:
 518   %id = call i32 @llvm.amdgcn.workitem.id.x()
 519   %my.tmp = sub i32 %id, %arg
 520   br label %bb1
 521
 522 bb1:                                              ; preds = %Flow, %bb
 523   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 524   %lsr.iv.next = add i32 %lsr.iv, 1
 525   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 526   br i1 %cmp0, label %bb4, label %Flow
 527
 528 bb4:                                              ; preds = %bb1
 529   %load = load volatile i32, ptr addrspace(1) undef, align 4
 530   %cmp1 = icmp sge i32 %my.tmp, %load
 531   br label %Flow
 532
 533 Flow:                                             ; preds = %bb4, %bb1
 534   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 535   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 536   br i1 %my.tmp3, label %bb1, label %bb9
 537
 538 bb9:                                              ; preds = %Flow
 539   store volatile i32 7, ptr addrspace(3) undef
 540   ret void
 541 }
 542
 543 declare i32 @llvm.amdgcn.workitem.id.x() #1
 544
 545 attributes #0 = { nounwind }
 546 attributes #1 = { nounwind readnone }