llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
   3 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s
   4
   5 define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
   6 ; SI-LABEL: break_inserted_outside_of_loop:
   7 ; SI:       ; %bb.0: ; %main_body
   8 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
   9 ; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
  10 ; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
  11 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
  12 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
  13 ; SI-NEXT:    v_and_b32_e32 v0, 1, v0
  14 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
  15 ; SI-NEXT:    s_mov_b64 s[0:1], 0
  16 ; SI-NEXT:  BB0_1: ; %ENDIF
  17 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
  18 ; SI-NEXT:    s_and_b64 s[2:3], exec, vcc
  19 ; SI-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
  20 ; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
  21 ; SI-NEXT:    s_cbranch_execnz BB0_1
  22 ; SI-NEXT:  ; %bb.2: ; %ENDLOOP
  23 ; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
  24 ; SI-NEXT:    s_mov_b32 s7, 0xf000
  25 ; SI-NEXT:    s_mov_b32 s6, -1
  26 ; SI-NEXT:    v_mov_b32_e32 v0, 0
  27 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
  28 ; SI-NEXT:    s_endpgm
  29 ;
  30 ; FLAT-LABEL: break_inserted_outside_of_loop:
  31 ; FLAT:       ; %bb.0: ; %main_body
  32 ; FLAT-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
  33 ; FLAT-NEXT:    s_load_dword s0, s[0:1], 0x2c
  34 ; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
  35 ; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
  36 ; FLAT-NEXT:    v_and_b32_e32 v0, s0, v0
  37 ; FLAT-NEXT:    v_and_b32_e32 v0, 1, v0
  38 ; FLAT-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
  39 ; FLAT-NEXT:    s_mov_b64 s[0:1], 0
  40 ; FLAT-NEXT:  BB0_1: ; %ENDIF
  41 ; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
  42 ; FLAT-NEXT:    s_and_b64 s[2:3], exec, vcc
  43 ; FLAT-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
  44 ; FLAT-NEXT:    s_andn2_b64 exec, exec, s[0:1]
  45 ; FLAT-NEXT:    s_cbranch_execnz BB0_1
  46 ; FLAT-NEXT:  ; %bb.2: ; %ENDLOOP
  47 ; FLAT-NEXT:    s_or_b64 exec, exec, s[0:1]
  48 ; FLAT-NEXT:    s_mov_b32 s7, 0xf000
  49 ; FLAT-NEXT:    s_mov_b32 s6, -1
  50 ; FLAT-NEXT:    v_mov_b32_e32 v0, 0
  51 ; FLAT-NEXT:    buffer_store_dword v0, off, s[4:7], 0
  52 ; FLAT-NEXT:    s_endpgm
  53 main_body:
  54   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
  55   %0 = and i32 %a, %tid
  56   %1 = trunc i32 %0 to i1
  57   br label %ENDIF
  58
  59 ENDLOOP:
  60   store i32 0, i32 addrspace(1)* %out
  61   ret void
  62
  63 ENDIF:
  64   br i1 %1, label %ENDLOOP, label %ENDIF
  65 }
  66
  67 define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
  68 ; SI-LABEL: phi_cond_outside_loop:
  69 ; SI:       ; %bb.0: ; %entry
  70 ; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
  71 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  72 ; SI-NEXT:    s_mov_b64 s[2:3], 0
  73 ; SI-NEXT:    s_mov_b64 s[4:5], 0
  74 ; SI-NEXT:    s_and_saveexec_b64 s[6:7], vcc
  75 ; SI-NEXT:    s_cbranch_execz BB1_2
  76 ; SI-NEXT:  ; %bb.1: ; %else
  77 ; SI-NEXT:    s_load_dword s0, s[0:1], 0x9
  78 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
  79 ; SI-NEXT:    v_cmp_eq_u32_e64 s[0:1], s0, 0
  80 ; SI-NEXT:    s_and_b64 s[4:5], s[0:1], exec
  81 ; SI-NEXT:  BB1_2: ; %endif
  82 ; SI-NEXT:    s_or_b64 exec, exec, s[6:7]
  83 ; SI-NEXT:  BB1_3: ; %loop
  84 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
  85 ; SI-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
  86 ; SI-NEXT:    s_or_b64 s[2:3], s[0:1], s[2:3]
  87 ; SI-NEXT:    s_andn2_b64 exec, exec, s[2:3]
  88 ; SI-NEXT:    s_cbranch_execnz BB1_3
  89 ; SI-NEXT:  ; %bb.4: ; %exit
  90 ; SI-NEXT:    s_endpgm
  91 ;
  92 ; FLAT-LABEL: phi_cond_outside_loop:
  93 ; FLAT:       ; %bb.0: ; %entry
  94 ; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
  95 ; FLAT-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  96 ; FLAT-NEXT:    s_mov_b64 s[2:3], 0
  97 ; FLAT-NEXT:    s_mov_b64 s[4:5], 0
  98 ; FLAT-NEXT:    s_and_saveexec_b64 s[6:7], vcc
  99 ; FLAT-NEXT:    s_cbranch_execz BB1_2
 100 ; FLAT-NEXT:  ; %bb.1: ; %else
 101 ; FLAT-NEXT:    s_load_dword s0, s[0:1], 0x24
 102 ; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
 103 ; FLAT-NEXT:    v_cmp_eq_u32_e64 s[0:1], s0, 0
 104 ; FLAT-NEXT:    s_and_b64 s[4:5], s[0:1], exec
 105 ; FLAT-NEXT:  BB1_2: ; %endif
 106 ; FLAT-NEXT:    s_or_b64 exec, exec, s[6:7]
 107 ; FLAT-NEXT:  BB1_3: ; %loop
 108 ; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
 109 ; FLAT-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
 110 ; FLAT-NEXT:    s_or_b64 s[2:3], s[0:1], s[2:3]
 111 ; FLAT-NEXT:    s_andn2_b64 exec, exec, s[2:3]
 112 ; FLAT-NEXT:    s_cbranch_execnz BB1_3
 113 ; FLAT-NEXT:  ; %bb.4: ; %exit
 114 ; FLAT-NEXT:    s_endpgm
 115 entry:
 116   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
 117   %0 = icmp eq i32 %tid , 0
 118   br i1 %0, label %if, label %else
 119
 120 if:
 121   br label %endif
 122
 123 else:
 124   %1 = icmp eq i32 %b, 0
 125   br label %endif
 126
 127 endif:
 128   %2 = phi i1 [0, %if], [%1, %else]
 129   br label %loop
 130
 131 loop:
 132   br i1 %2, label %exit, label %loop
 133
 134 exit:
 135   ret void
 136 }
 137
 138 define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
 139 ; SI-LABEL: switch_unreachable:
 140 ; SI:       ; %bb.0: ; %centry
 141 ;
 142 ; FLAT-LABEL: switch_unreachable:
 143 ; FLAT:       ; %bb.0: ; %centry
 144 centry:
 145   switch i32 %x, label %sw.default [
 146     i32 0, label %sw.bb
 147     i32 60, label %sw.bb
 148   ]
 149
 150 sw.bb:
 151   unreachable
 152
 153 sw.default:
 154   unreachable
 155
 156 sw.epilog:
 157   ret void
 158 }
 159
 160 declare float @llvm.fabs.f32(float) nounwind readnone
 161
 162 define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
 163 ; SI-LABEL: loop_land_info_assert:
 164 ; SI:       ; %bb.0: ; %entry
 165 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 166 ; SI-NEXT:    s_mov_b32 s6, -1
 167 ; SI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 168 ; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
 169 ; SI-NEXT:    s_load_dword s14, s[0:1], 0xc
 170 ; SI-NEXT:    s_brev_b32 s8, 44
 171 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 172 ; SI-NEXT:    v_cmp_lt_i32_e64 s[0:1], s2, 1
 173 ; SI-NEXT:    v_cmp_lt_i32_e64 s[4:5], s3, 4
 174 ; SI-NEXT:    v_cmp_gt_i32_e64 s[2:3], s3, 3
 175 ; SI-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
 176 ; SI-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
 177 ; SI-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
 178 ; SI-NEXT:    s_waitcnt vmcnt(0)
 179 ; SI-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s8
 180 ; SI-NEXT:    s_and_b64 s[4:5], exec, s[4:5]
 181 ; SI-NEXT:    v_mov_b32_e32 v0, 3
 182 ; SI-NEXT:    s_branch BB3_4
 183 ; SI-NEXT:  BB3_1: ; %Flow6
 184 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 185 ; SI-NEXT:    s_mov_b64 s[8:9], 0
 186 ; SI-NEXT:  BB3_2: ; %Flow5
 187 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 188 ; SI-NEXT:    s_mov_b64 s[12:13], 0
 189 ; SI-NEXT:  BB3_3: ; %Flow
 190 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 191 ; SI-NEXT:    s_and_b64 vcc, exec, s[10:11]
 192 ; SI-NEXT:    s_cbranch_vccnz BB3_8
 193 ; SI-NEXT:  BB3_4: ; %while.cond
 194 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 195 ; SI-NEXT:    s_mov_b64 s[12:13], -1
 196 ; SI-NEXT:    s_mov_b64 s[8:9], -1
 197 ; SI-NEXT:    s_mov_b64 s[10:11], -1
 198 ; SI-NEXT:    s_mov_b64 vcc, s[0:1]
 199 ; SI-NEXT:    s_cbranch_vccz BB3_3
 200 ; SI-NEXT:  ; %bb.5: ; %convex.exit
 201 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 202 ; SI-NEXT:    s_mov_b64 s[8:9], -1
 203 ; SI-NEXT:    s_mov_b64 s[10:11], -1
 204 ; SI-NEXT:    s_mov_b64 vcc, s[2:3]
 205 ; SI-NEXT:    s_cbranch_vccz BB3_2
 206 ; SI-NEXT:  ; %bb.6: ; %if.end
 207 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 208 ; SI-NEXT:    s_mov_b64 s[10:11], -1
 209 ; SI-NEXT:    s_mov_b64 vcc, s[4:5]
 210 ; SI-NEXT:    s_cbranch_vccz BB3_1
 211 ; SI-NEXT:  ; %bb.7: ; %if.else
 212 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 213 ; SI-NEXT:    s_mov_b64 s[10:11], 0
 214 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 215 ; SI-NEXT:    s_waitcnt vmcnt(0)
 216 ; SI-NEXT:    s_branch BB3_1
 217 ; SI-NEXT:  BB3_8: ; %loop.exit.guard4
 218 ; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 219 ; SI-NEXT:    s_and_b64 vcc, exec, s[8:9]
 220 ; SI-NEXT:    s_cbranch_vccz BB3_4
 221 ; SI-NEXT:  ; %bb.9: ; %loop.exit.guard
 222 ; SI-NEXT:    s_and_b64 vcc, exec, s[12:13]
 223 ; SI-NEXT:    s_cbranch_vccz BB3_13
 224 ; SI-NEXT:  ; %bb.10: ; %for.cond.preheader
 225 ; SI-NEXT:    s_cmpk_lt_i32 s14, 0x3e8
 226 ; SI-NEXT:    s_cbranch_scc0 BB3_13
 227 ; SI-NEXT:  ; %bb.11: ; %for.body
 228 ; SI-NEXT:    s_and_b64 vcc, exec, 0
 229 ; SI-NEXT:  BB3_12: ; %self.loop
 230 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 231 ; SI-NEXT:    s_mov_b64 vcc, vcc
 232 ; SI-NEXT:    s_cbranch_vccz BB3_12
 233 ; SI-NEXT:  BB3_13: ; %DummyReturnBlock
 234 ; SI-NEXT:    s_endpgm
 235 ;
 236 ; FLAT-LABEL: loop_land_info_assert:
 237 ; FLAT:       ; %bb.0: ; %entry
 238 ; FLAT-NEXT:    s_mov_b32 s7, 0xf000
 239 ; FLAT-NEXT:    s_mov_b32 s6, -1
 240 ; FLAT-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 241 ; FLAT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 242 ; FLAT-NEXT:    s_load_dword s14, s[0:1], 0x30
 243 ; FLAT-NEXT:    s_brev_b32 s8, 44
 244 ; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
 245 ; FLAT-NEXT:    v_cmp_lt_i32_e64 s[0:1], s2, 1
 246 ; FLAT-NEXT:    v_cmp_lt_i32_e64 s[4:5], s3, 4
 247 ; FLAT-NEXT:    v_cmp_gt_i32_e64 s[2:3], s3, 3
 248 ; FLAT-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
 249 ; FLAT-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
 250 ; FLAT-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
 251 ; FLAT-NEXT:    s_waitcnt vmcnt(0)
 252 ; FLAT-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s8
 253 ; FLAT-NEXT:    s_and_b64 s[4:5], exec, s[4:5]
 254 ; FLAT-NEXT:    v_mov_b32_e32 v0, 3
 255 ; FLAT-NEXT:    s_branch BB3_4
 256 ; FLAT-NEXT:  BB3_1: ; %Flow6
 257 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 258 ; FLAT-NEXT:    s_mov_b64 s[8:9], 0
 259 ; FLAT-NEXT:  BB3_2: ; %Flow5
 260 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 261 ; FLAT-NEXT:    s_mov_b64 s[12:13], 0
 262 ; FLAT-NEXT:  BB3_3: ; %Flow
 263 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 264 ; FLAT-NEXT:    s_and_b64 vcc, exec, s[10:11]
 265 ; FLAT-NEXT:    s_cbranch_vccnz BB3_8
 266 ; FLAT-NEXT:  BB3_4: ; %while.cond
 267 ; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
 268 ; FLAT-NEXT:    s_mov_b64 s[12:13], -1
 269 ; FLAT-NEXT:    s_mov_b64 s[8:9], -1
 270 ; FLAT-NEXT:    s_mov_b64 s[10:11], -1
 271 ; FLAT-NEXT:    s_mov_b64 vcc, s[0:1]
 272 ; FLAT-NEXT:    s_cbranch_vccz BB3_3
 273 ; FLAT-NEXT:  ; %bb.5: ; %convex.exit
 274 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 275 ; FLAT-NEXT:    s_mov_b64 s[8:9], -1
 276 ; FLAT-NEXT:    s_mov_b64 s[10:11], -1
 277 ; FLAT-NEXT:    s_mov_b64 vcc, s[2:3]
 278 ; FLAT-NEXT:    s_cbranch_vccz BB3_2
 279 ; FLAT-NEXT:  ; %bb.6: ; %if.end
 280 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 281 ; FLAT-NEXT:    s_mov_b64 s[10:11], -1
 282 ; FLAT-NEXT:    s_mov_b64 vcc, s[4:5]
 283 ; FLAT-NEXT:    s_cbranch_vccz BB3_1
 284 ; FLAT-NEXT:  ; %bb.7: ; %if.else
 285 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 286 ; FLAT-NEXT:    s_mov_b64 s[10:11], 0
 287 ; FLAT-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 288 ; FLAT-NEXT:    s_waitcnt vmcnt(0)
 289 ; FLAT-NEXT:    s_branch BB3_1
 290 ; FLAT-NEXT:  BB3_8: ; %loop.exit.guard4
 291 ; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
 292 ; FLAT-NEXT:    s_and_b64 vcc, exec, s[8:9]
 293 ; FLAT-NEXT:    s_cbranch_vccz BB3_4
 294 ; FLAT-NEXT:  ; %bb.9: ; %loop.exit.guard
 295 ; FLAT-NEXT:    s_and_b64 vcc, exec, s[12:13]
 296 ; FLAT-NEXT:    s_cbranch_vccz BB3_13
 297 ; FLAT-NEXT:  ; %bb.10: ; %for.cond.preheader
 298 ; FLAT-NEXT:    s_cmpk_lt_i32 s14, 0x3e8
 299 ; FLAT-NEXT:    s_cbranch_scc0 BB3_13
 300 ; FLAT-NEXT:  ; %bb.11: ; %for.body
 301 ; FLAT-NEXT:    s_and_b64 vcc, exec, 0
 302 ; FLAT-NEXT:  BB3_12: ; %self.loop
 303 ; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
 304 ; FLAT-NEXT:    s_mov_b64 vcc, vcc
 305 ; FLAT-NEXT:    s_cbranch_vccz BB3_12
 306 ; FLAT-NEXT:  BB3_13: ; %DummyReturnBlock
 307 ; FLAT-NEXT:    s_endpgm
 308 entry:
 309   %cmp = icmp sgt i32 %c0, 0
 310   br label %while.cond.outer
 311
 312 while.cond.outer:
 313   %tmp = load float, float addrspace(1)* undef
 314   br label %while.cond
 315
 316 while.cond:
 317   %cmp1 = icmp slt i32 %c1, 4
 318   br i1 %cmp1, label %convex.exit, label %for.cond
 319
 320 convex.exit:
 321   %or = or i1 %cmp, %cmp1
 322   br i1 %or, label %return, label %if.end
 323
 324 if.end:
 325   %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
 326   %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
 327   br i1 %cmp2, label %if.else, label %while.cond.outer
 328
 329 if.else:
 330   store volatile i32 3, i32 addrspace(1)* undef, align 4
 331   br label %while.cond
 332
 333 for.cond:
 334   %cmp3 = icmp slt i32 %c3, 1000
 335   br i1 %cmp3, label %for.body, label %return
 336
 337 for.body:
 338   br i1 %cmp3, label %self.loop, label %if.end.2
 339
 340 if.end.2:
 341   %or.cond2 = or i1 %cmp3, %arg
 342   br i1 %or.cond2, label %return, label %for.cond
 343
 344 self.loop:
 345  br label %self.loop
 346
 347 return:
 348   ret void
 349 }
 350
 351 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
 352
 353 attributes #0 = { nounwind readnone }