llvm/test/CodeGen/AMDGPU/loop_break.ll

   1 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
   2 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
   3
   4 ; Uses llvm.amdgcn.break
   5
   6 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
   7 ; OPT-LABEL: @break_loop(
   8 ; OPT-NEXT:  bb:
   9 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  10 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  11 ; OPT-NEXT:    br label [[BB1:%.*]]
  12 ; OPT:       bb1:
  13 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  14 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
  15 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  16 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  17 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  18 ; OPT:       bb4:
  19 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
  20 ; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
  21 ; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
  22 ; OPT-NEXT:    br label [[FLOW]]
  23 ; OPT:       Flow:
  24 ; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
  25 ; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
  26 ; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
  27 ; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
  28 ; OPT:       bb9:
  29 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
  30 ; OPT-NEXT:    ret void
  31 ;
  32 ; GCN-LABEL: break_loop:
  33 ; GCN:       ; %bb.0: ; %bb
  34 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
  35 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
  36 ; GCN-NEXT:    s_mov_b32 s2, -1
  37 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
  38 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
  39 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
  40 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
  41 ; GCN-NEXT:    ; implicit-def: $sgpr6
  42 ; GCN-NEXT:  BB0_1: ; %bb1
  43 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
  44 ; GCN-NEXT:    s_add_i32 s6, s6, 1
  45 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
  46 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
  47 ; GCN-NEXT:    s_cbranch_scc1 BB0_3
  48 ; GCN-NEXT:  ; %bb.2: ; %bb4
  49 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  50 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
  51 ; GCN-NEXT:    s_waitcnt vmcnt(0)
  52 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
  53 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
  54 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
  55 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
  56 ; GCN-NEXT:  BB0_3: ; %Flow
  57 ; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
  58 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
  59 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
  60 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
  61 ; GCN-NEXT:    s_cbranch_execnz BB0_1
  62 ; GCN-NEXT:  ; %bb.4: ; %bb9
  63 ; GCN-NEXT:    s_endpgm
  64 bb:
  65   %id = call i32 @llvm.amdgcn.workitem.id.x()
  66   %my.tmp = sub i32 %id, %arg
  67   br label %bb1
  68
  69 bb1:
  70   %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
  71   %lsr.iv.next = add i32 %lsr.iv, 1
  72   %cmp0 = icmp slt i32 %lsr.iv.next, 0
  73   br i1 %cmp0, label %bb4, label %bb9
  74
  75 bb4:
  76   %load = load volatile i32, i32 addrspace(1)* undef, align 4
  77   %cmp1 = icmp slt i32 %my.tmp, %load
  78   br i1 %cmp1, label %bb1, label %bb9
  79
  80 bb9:
  81   ret void
  82 }
  83
  84 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
  85 ; OPT-LABEL: @undef_phi_cond_break_loop(
  86 ; OPT-NEXT:  bb:
  87 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
  88 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
  89 ; OPT-NEXT:    br label [[BB1:%.*]]
  90 ; OPT:       bb1:
  91 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
  92 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
  93 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  94 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  95 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  96 ; OPT:       bb4:
  97 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
  98 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  99 ; OPT-NEXT:    br label [[FLOW]]
 100 ; OPT:       Flow:
 101 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
 102 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 103 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 104 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 105 ; OPT:       bb9:
 106 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 107 ; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
 108 ; OPT-NEXT:    ret void
 109 ;
 110 ; GCN-LABEL: undef_phi_cond_break_loop:
 111 ; GCN:       ; %bb.0: ; %bb
 112 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 113 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 114 ; GCN-NEXT:    s_mov_b32 s2, -1
 115 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 116 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 117 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 118 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 119 ; GCN-NEXT:    ; implicit-def: $sgpr6
 120 ; GCN-NEXT:  BB1_1: ; %bb1
 121 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 122 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 123 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 124 ; GCN-NEXT:    s_cbranch_scc1 BB1_3
 125 ; GCN-NEXT:  ; %bb.2: ; %bb4
 126 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 127 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 128 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 129 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 130 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 131 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 132 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 133 ; GCN-NEXT:  BB1_3: ; %Flow
 134 ; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 135 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 136 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 137 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 138 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 139 ; GCN-NEXT:    s_cbranch_execnz BB1_1
 140 ; GCN-NEXT:  ; %bb.4: ; %bb9
 141 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 142 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 143 ; GCN-NEXT:    s_mov_b32 m0, -1
 144 ; GCN-NEXT:    ds_write_b32 v0, v0
 145 ; GCN-NEXT:    s_endpgm
 146 bb:
 147   %id = call i32 @llvm.amdgcn.workitem.id.x()
 148   %my.tmp = sub i32 %id, %arg
 149   br label %bb1
 150
 151 bb1:                                              ; preds = %Flow, %bb
 152   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 153   %lsr.iv.next = add i32 %lsr.iv, 1
 154   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 155   br i1 %cmp0, label %bb4, label %Flow
 156
 157 bb4:                                              ; preds = %bb1
 158   %load = load volatile i32, i32 addrspace(1)* undef, align 4
 159   %cmp1 = icmp sge i32 %my.tmp, %load
 160   br label %Flow
 161
 162 Flow:                                             ; preds = %bb4, %bb1
 163   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 164   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
 165   br i1 %my.tmp3, label %bb9, label %bb1
 166
 167 bb9:                                              ; preds = %Flow
 168   store volatile i32 7, i32 addrspace(3)* undef
 169   ret void
 170 }
 171
 172 ; FIXME: ConstantExpr compare of address to null folds away
 173 @lds = addrspace(3) global i32 undef
 174
 175 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
 176 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
 177 ; OPT-NEXT:  bb:
 178 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 179 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 180 ; OPT-NEXT:    br label [[BB1:%.*]]
 181 ; OPT:       bb1:
 182 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 183 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
 184 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
 185 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 186 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 187 ; OPT:       bb4:
 188 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
 189 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 190 ; OPT-NEXT:    br label [[FLOW]]
 191 ; OPT:       Flow:
 192 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
 193 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 194 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 195 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 196 ; OPT:       bb9:
 197 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 198 ; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
 199 ; OPT-NEXT:    ret void
 200 ;
 201 ; GCN-LABEL: constexpr_phi_cond_break_loop:
 202 ; GCN:       ; %bb.0: ; %bb
 203 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 204 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 205 ; GCN-NEXT:    s_mov_b32 s2, -1
 206 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 207 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 208 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 209 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 210 ; GCN-NEXT:    ; implicit-def: $sgpr6
 211 ; GCN-NEXT:  BB2_1: ; %bb1
 212 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 213 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 214 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 215 ; GCN-NEXT:    s_cbranch_scc1 BB2_3
 216 ; GCN-NEXT:  ; %bb.2: ; %bb4
 217 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 218 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 219 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 220 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 221 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 222 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 223 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 224 ; GCN-NEXT:  BB2_3: ; %Flow
 225 ; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
 226 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 227 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 228 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 229 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 230 ; GCN-NEXT:    s_cbranch_execnz BB2_1
 231 ; GCN-NEXT:  ; %bb.4: ; %bb9
 232 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 233 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 234 ; GCN-NEXT:    s_mov_b32 m0, -1
 235 ; GCN-NEXT:    ds_write_b32 v0, v0
 236 ; GCN-NEXT:    s_endpgm
 237 bb:
 238   %id = call i32 @llvm.amdgcn.workitem.id.x()
 239   %my.tmp = sub i32 %id, %arg
 240   br label %bb1
 241
 242 bb1:                                              ; preds = %Flow, %bb
 243   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 244   %lsr.iv.next = add i32 %lsr.iv, 1
 245   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 246   br i1 %cmp0, label %bb4, label %Flow
 247
 248 bb4:                                              ; preds = %bb1
 249   %load = load volatile i32, i32 addrspace(1)* undef, align 4
 250   %cmp1 = icmp sge i32 %my.tmp, %load
 251   br label %Flow
 252
 253 Flow:                                             ; preds = %bb4, %bb1
 254   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 255   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
 256   br i1 %my.tmp3, label %bb9, label %bb1
 257
 258 bb9:                                              ; preds = %Flow
 259   store volatile i32 7, i32 addrspace(3)* undef
 260   ret void
 261 }
 262
 263 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
 264 ; OPT-LABEL: @true_phi_cond_break_loop(
 265 ; OPT-NEXT:  bb:
 266 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 267 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 268 ; OPT-NEXT:    br label [[BB1:%.*]]
 269 ; OPT:       bb1:
 270 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 271 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
 272 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
 273 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 274 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 275 ; OPT:       bb4:
 276 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
 277 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 278 ; OPT-NEXT:    br label [[FLOW]]
 279 ; OPT:       Flow:
 280 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 281 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 282 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 283 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 284 ; OPT:       bb9:
 285 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 286 ; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
 287 ; OPT-NEXT:    ret void
 288 ;
 289 ; GCN-LABEL: true_phi_cond_break_loop:
 290 ; GCN:       ; %bb.0: ; %bb
 291 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 292 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 293 ; GCN-NEXT:    s_mov_b32 s2, -1
 294 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 295 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 296 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 297 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 298 ; GCN-NEXT:    ; implicit-def: $sgpr6
 299 ; GCN-NEXT:  BB3_1: ; %bb1
 300 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 301 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 302 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 303 ; GCN-NEXT:    s_cbranch_scc1 BB3_3
 304 ; GCN-NEXT:  ; %bb.2: ; %bb4
 305 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 306 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 307 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 308 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 309 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 310 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 311 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 312 ; GCN-NEXT:  BB3_3: ; %Flow
 313 ; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 314 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 315 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 316 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 317 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 318 ; GCN-NEXT:    s_cbranch_execnz BB3_1
 319 ; GCN-NEXT:  ; %bb.4: ; %bb9
 320 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 321 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 322 ; GCN-NEXT:    s_mov_b32 m0, -1
 323 ; GCN-NEXT:    ds_write_b32 v0, v0
 324 ; GCN-NEXT:    s_endpgm
 325 bb:
 326   %id = call i32 @llvm.amdgcn.workitem.id.x()
 327   %my.tmp = sub i32 %id, %arg
 328   br label %bb1
 329
 330 bb1:                                              ; preds = %Flow, %bb
 331   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 332   %lsr.iv.next = add i32 %lsr.iv, 1
 333   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 334   br i1 %cmp0, label %bb4, label %Flow
 335
 336 bb4:                                              ; preds = %bb1
 337   %load = load volatile i32, i32 addrspace(1)* undef, align 4
 338   %cmp1 = icmp sge i32 %my.tmp, %load
 339   br label %Flow
 340
 341 Flow:                                             ; preds = %bb4, %bb1
 342   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 343   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 344   br i1 %my.tmp3, label %bb9, label %bb1
 345
 346 bb9:                                              ; preds = %Flow
 347   store volatile i32 7, i32 addrspace(3)* undef
 348   ret void
 349 }
 350
 351 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
 352 ; OPT-LABEL: @false_phi_cond_break_loop(
 353 ; OPT-NEXT:  bb:
 354 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 355 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 356 ; OPT-NEXT:    br label [[BB1:%.*]]
 357 ; OPT:       bb1:
 358 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 359 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
 360 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
 361 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 362 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 363 ; OPT:       bb4:
 364 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
 365 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 366 ; OPT-NEXT:    br label [[FLOW]]
 367 ; OPT:       Flow:
 368 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
 369 ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
 370 ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
 371 ; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
 372 ; OPT:       bb9:
 373 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
 374 ; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
 375 ; OPT-NEXT:    ret void
 376 ;
 377 ; GCN-LABEL: false_phi_cond_break_loop:
 378 ; GCN:       ; %bb.0: ; %bb
 379 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 380 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 381 ; GCN-NEXT:    s_mov_b32 s2, -1
 382 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 383 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 384 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 385 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 386 ; GCN-NEXT:    ; implicit-def: $sgpr6
 387 ; GCN-NEXT:  BB4_1: ; %bb1
 388 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 389 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 390 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 391 ; GCN-NEXT:    s_cbranch_scc1 BB4_3
 392 ; GCN-NEXT:  ; %bb.2: ; %bb4
 393 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 394 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 395 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 396 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 397 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 398 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 399 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 400 ; GCN-NEXT:  BB4_3: ; %Flow
 401 ; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
 402 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 403 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 404 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 405 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 406 ; GCN-NEXT:    s_cbranch_execnz BB4_1
 407 ; GCN-NEXT:  ; %bb.4: ; %bb9
 408 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 409 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 410 ; GCN-NEXT:    s_mov_b32 m0, -1
 411 ; GCN-NEXT:    ds_write_b32 v0, v0
 412 ; GCN-NEXT:    s_endpgm
 413 bb:
 414   %id = call i32 @llvm.amdgcn.workitem.id.x()
 415   %my.tmp = sub i32 %id, %arg
 416   br label %bb1
 417
 418 bb1:                                              ; preds = %Flow, %bb
 419   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 420   %lsr.iv.next = add i32 %lsr.iv, 1
 421   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 422   br i1 %cmp0, label %bb4, label %Flow
 423
 424 bb4:                                              ; preds = %bb1
 425   %load = load volatile i32, i32 addrspace(1)* undef, align 4
 426   %cmp1 = icmp sge i32 %my.tmp, %load
 427   br label %Flow
 428
 429 Flow:                                             ; preds = %bb4, %bb1
 430   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 431   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
 432   br i1 %my.tmp3, label %bb9, label %bb1
 433
 434 bb9:                                              ; preds = %Flow
 435   store volatile i32 7, i32 addrspace(3)* undef
 436   ret void
 437 }
 438
 439 ; Swap order of branches in flow block so that the true phi is
 440 ; continue.
 441
 442 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
 443 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
 444 ; OPT-NEXT:  bb:
 445 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 446 ; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
 447 ; OPT-NEXT:    br label [[BB1:%.*]]
 448 ; OPT:       bb1:
 449 ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
 450 ; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
 451 ; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
 452 ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
 453 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 454 ; OPT:       bb4:
 455 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
 456 ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 457 ; OPT-NEXT:    br label [[FLOW]]
 458 ; OPT:       Flow:
 459 ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 460 ; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
 461 ; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
 462 ; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
 463 ; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
 464 ; OPT:       bb9:
 465 ; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
 466 ; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
 467 ; OPT-NEXT:    ret void
 468 ;
 469 ; GCN-LABEL: invert_true_phi_cond_break_loop:
 470 ; GCN:       ; %bb.0: ; %bb
 471 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
 472 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 473 ; GCN-NEXT:    s_mov_b32 s2, -1
 474 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 475 ; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
 476 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 477 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 478 ; GCN-NEXT:    ; implicit-def: $sgpr6
 479 ; GCN-NEXT:  BB5_1: ; %bb1
 480 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 481 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 482 ; GCN-NEXT:    s_cmp_gt_i32 s6, -1
 483 ; GCN-NEXT:    s_cbranch_scc1 BB5_3
 484 ; GCN-NEXT:  ; %bb.2: ; %bb4
 485 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 486 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 487 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 488 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
 489 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 490 ; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
 491 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 492 ; GCN-NEXT:  BB5_3: ; %Flow
 493 ; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 494 ; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 495 ; GCN-NEXT:    s_add_i32 s6, s6, 1
 496 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
 497 ; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
 498 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 499 ; GCN-NEXT:    s_cbranch_execnz BB5_1
 500 ; GCN-NEXT:  ; %bb.4: ; %bb9
 501 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 502 ; GCN-NEXT:    v_mov_b32_e32 v0, 7
 503 ; GCN-NEXT:    s_mov_b32 m0, -1
 504 ; GCN-NEXT:    ds_write_b32 v0, v0
 505 ; GCN-NEXT:    s_endpgm
 506 bb:
 507   %id = call i32 @llvm.amdgcn.workitem.id.x()
 508   %my.tmp = sub i32 %id, %arg
 509   br label %bb1
 510
 511 bb1:                                              ; preds = %Flow, %bb
 512   %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
 513   %lsr.iv.next = add i32 %lsr.iv, 1
 514   %cmp0 = icmp slt i32 %lsr.iv.next, 0
 515   br i1 %cmp0, label %bb4, label %Flow
 516
 517 bb4:                                              ; preds = %bb1
 518   %load = load volatile i32, i32 addrspace(1)* undef, align 4
 519   %cmp1 = icmp sge i32 %my.tmp, %load
 520   br label %Flow
 521
 522 Flow:                                             ; preds = %bb4, %bb1
 523   %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
 524   %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
 525   br i1 %my.tmp3, label %bb1, label %bb9
 526
 527 bb9:                                              ; preds = %Flow
 528   store volatile i32 7, i32 addrspace(3)* undef
 529   ret void
 530 }
 531
 532 declare i32 @llvm.amdgcn.workitem.id.x() #1
 533
 534 attributes #0 = { nounwind }
 535 attributes #1 = { nounwind readnone }