llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
   2
   3 ; GCN-LABEL: {{^}}simple_nested_if:
   4 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
   5 ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
   6 ; GCN:      s_and_b64 exec, exec, vcc
   7 ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
   8 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
   9 ; GCN:      store_dword
  10 ; GCN-NEXT: {{^}}[[ENDIF]]:
  11 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
  12 ; GCN: ds_write_b32
  13 ; GCN: s_endpgm
  14
  15 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
  16 bb:
  17   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  18   %tmp1 = icmp ugt i32 %tmp, 1
  19   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
  20
  21 bb.outer.then:                                    ; preds = %bb
  22   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
  23   store i32 0, i32 addrspace(1)* %tmp4, align 4
  24   %tmp5 = icmp eq i32 %tmp, 2
  25   br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
  26
  27 bb.inner.then:                                    ; preds = %bb.outer.then
  28   %tmp7 = add i32 %tmp, 1
  29   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
  30   store i32 1, i32 addrspace(1)* %tmp9, align 4
  31   br label %bb.outer.end
  32
  33 bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
  34   store i32 3, i32 addrspace(3)* null
  35   ret void
  36 }
  37
  38 ; GCN-LABEL: {{^}}uncollapsable_nested_if:
  39 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
  40 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
  41 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
  42 ; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
  43 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
  44 ; GCN:      store_dword
  45 ; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
  46 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
  47 ; GCN:      store_dword
  48 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
  49 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
  50 ; GCN: ds_write_b32
  51 ; GCN: s_endpgm
  52 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
  53 bb:
  54   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  55   %tmp1 = icmp ugt i32 %tmp, 1
  56   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
  57
  58 bb.outer.then:                                    ; preds = %bb
  59   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
  60   store i32 0, i32 addrspace(1)* %tmp4, align 4
  61   %tmp5 = icmp eq i32 %tmp, 2
  62   br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
  63
  64 bb.inner.then:                                    ; preds = %bb.outer.then
  65   %tmp7 = add i32 %tmp, 1
  66   %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
  67   store i32 1, i32 addrspace(1)* %tmp8, align 4
  68   br label %bb.inner.end
  69
  70 bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
  71   %tmp9 = add i32 %tmp, 2
  72   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
  73   store i32 2, i32 addrspace(1)* %tmp10, align 4
  74   br label %bb.outer.end
  75
  76 bb.outer.end:                                     ; preds = %bb.inner.then, %bb
  77   store i32 3, i32 addrspace(3)* null
  78   ret void
  79 }
  80
  81 ; GCN-LABEL: {{^}}nested_if_if_else:
  82 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
  83 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
  84 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
  85 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
  86 ; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
  87 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
  88 ; GCN:      store_dword
  89 ; GCN:      {{^}}[[THEN_INNER]]:
  90 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
  91 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
  92 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
  93 ; GCN:      store_dword
  94 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
  95 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
  96 ; GCN: ds_write_b32
  97 ; GCN: s_endpgm
  98 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
  99 bb:
 100   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
 101   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
 102   store i32 0, i32 addrspace(1)* %tmp1, align 4
 103   %tmp2 = icmp ugt i32 %tmp, 1
 104   br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
 105
 106 bb.outer.then:                                       ; preds = %bb
 107   %tmp5 = icmp eq i32 %tmp, 2
 108   br i1 %tmp5, label %bb.then, label %bb.else
 109
 110 bb.then:                                             ; preds = %bb.outer.then
 111   %tmp3 = add i32 %tmp, 1
 112   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
 113   store i32 1, i32 addrspace(1)* %tmp4, align 4
 114   br label %bb.outer.end
 115
 116 bb.else:                                             ; preds = %bb.outer.then
 117   %tmp7 = add i32 %tmp, 2
 118   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
 119   store i32 2, i32 addrspace(1)* %tmp9, align 4
 120   br label %bb.outer.end
 121
 122 bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
 123   store i32 3, i32 addrspace(3)* null
 124   ret void
 125 }
 126
 127 ; GCN-LABEL: {{^}}nested_if_else_if:
 128 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
 129 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
 130 ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
 131 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
 132 ; GCN:      store_dword
 133 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
 134 ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
 135 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
 136 ; GCN:      store_dword
 137 ; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
 138 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
 139 ; GCN:      {{^}}[[THEN_OUTER]]:
 140 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
 141 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
 142 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
 143 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
 144 ; GCN:      store_dword
 145 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
 146 ; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
 147 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
 148 ; GCN:      store_dword
 149 ; GCN-NEXT: [[FLOW1]]:
 150 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]]
 151 ; GCN:      s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
 152 ; GCN:      ds_write_b32
 153 ; GCN:      s_endpgm
 154 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
 155 bb:
 156   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
 157   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
 158   store i32 0, i32 addrspace(1)* %tmp1, align 4
 159   %cc1 = icmp ugt i32 %tmp, 1
 160   br i1 %cc1, label %bb.outer.then, label %bb.outer.else
 161
 162 bb.outer.then:
 163   %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
 164   store i32 1, i32 addrspace(1)* %tmp2, align 4
 165   %cc2 = icmp eq i32 %tmp, 2
 166   br i1 %cc2, label %bb.inner.then, label %bb.outer.end
 167
 168 bb.inner.then:
 169   %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
 170   store i32 2, i32 addrspace(1)* %tmp3, align 4
 171   br label %bb.outer.end
 172
 173 bb.outer.else:
 174   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
 175   store i32 3, i32 addrspace(1)* %tmp4, align 4
 176   %cc3 = icmp eq i32 %tmp, 2
 177   br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
 178
 179 bb.inner.then2:
 180   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
 181   store i32 4, i32 addrspace(1)* %tmp5, align 4
 182   br label %bb.outer.end
 183
 184 bb.outer.end:
 185   store i32 3, i32 addrspace(3)* null
 186   ret void
 187 }
 188
 189 ; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
 190 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
 191 ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
 192 ; GCN-NEXT: ; %bb.{{[0-9]+}}:
 193 ; GCN:      store_dword
 194 ; GCN-NEXT: {{^}}[[ENDIF]]:
 195 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
 196 ; GCN:      s_barrier
 197 ; GCN-NEXT: s_endpgm
 198 define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
 199 bb:
 200   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
 201   %tmp1 = icmp ugt i32 %tmp, 1
 202   br i1 %tmp1, label %bb.then, label %bb.end
 203
 204 bb.then:                                          ; preds = %bb
 205   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
 206   store i32 0, i32 addrspace(1)* %tmp4, align 4
 207   br label %bb.end
 208
 209 bb.end:                                           ; preds = %bb.then, %bb
 210   call void @llvm.amdgcn.s.barrier()
 211   ret void
 212 }
 213
 214 ; GCN-LABEL: {{^}}scc_liveness:
 215
 216 ; GCN: [[BB1_OUTER_LOOP:BB[0-9]+_[0-9]+]]:
 217 ; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
 218 ;
 219 ; GCN: [[BB1_INNER_LOOP:BB[0-9]+_[0-9]+]]:
 220 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 221 ; GCN: s_andn2_b64
 222 ; GCN-NEXT: s_cbranch_execz
 223
 224 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
 225 ; GCN: s_andn2_b64 exec, exec,
 226 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
 227
 228 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen
 229
 230 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}}
 231 ; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]]
 232
 233 ; GCN-NOT: s_or_b64 exec, exec
 234
 235 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 236 ; GCN: buffer_store_dword
 237 ; GCN: buffer_store_dword
 238 ; GCN: buffer_store_dword
 239 ; GCN: buffer_store_dword
 240 ; GCN: s_setpc_b64
 241 define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 242 bb:
 243   br label %bb1
 244
 245 bb1:                                              ; preds = %Flow1, %bb1, %bb
 246   %tmp = icmp slt i32 %arg, 519
 247   br i1 %tmp, label %bb2, label %bb1
 248
 249 bb2:                                              ; preds = %bb1
 250   %tmp3 = icmp eq i32 %arg, 0
 251   br i1 %tmp3, label %bb4, label %bb10
 252
 253 bb4:                                              ; preds = %bb2
 254   %tmp6 = load float, float addrspace(5)* undef
 255   %tmp7 = fcmp olt float %tmp6, 0.0
 256   br i1 %tmp7, label %bb8, label %Flow
 257
 258 bb8:                                              ; preds = %bb4
 259   %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
 260   br label %Flow
 261
 262 Flow:                                             ; preds = %bb8, %bb4
 263   %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
 264   br label %bb10
 265
 266 bb10:                                             ; preds = %Flow, %bb2
 267   %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
 268   br i1 %tmp3, label %bb12, label %Flow1
 269
 270 Flow1:                                            ; preds = %bb10
 271   br label %bb1
 272
 273 bb12:                                             ; preds = %bb10
 274   store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
 275   ret void
 276 }
 277
 278 declare i32 @llvm.amdgcn.workitem.id.x() #0
 279 declare void @llvm.amdgcn.s.barrier() #1
 280
 281 attributes #0 = { nounwind readnone speculatable }
 282 attributes #1 = { nounwind convergent }
 283 attributes #2 = { nounwind }