test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll

   1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
   4
   5 ; GCN-LABEL: {{^}}gs_const:
   6 ; GCN-NOT: v_cmpx
   7 ; GCN: s_mov_b64 exec, 0
   8 define amdgpu_gs void @gs_const() {
   9   %tmp = icmp ule i32 0, 3
  10   %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
  11   %c1 = fcmp oge float %tmp1, 0.0
  12   call void @llvm.amdgcn.kill(i1 %c1)
  13   %tmp2 = icmp ule i32 3, 0
  14   %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
  15   %c2 = fcmp oge float %tmp3, 0.0
  16   call void @llvm.amdgcn.kill(i1 %c2)
  17   ret void
  18 }
  19
  20 ; GCN-LABEL: {{^}}vcc_implicit_def:
  21 ; GCN-NOT: v_cmp_gt_f32_e32 vcc,
  22 ; GCN: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
  23 ; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
  24 ; GFX10: v_cmpx_le_f32_e32 0, v{{[0-9]+}}
  25 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
  26 define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
  27   %tmp0 = fcmp olt float %arg13, 0.000000e+00
  28   %c1 = fcmp oge float %arg14, 0.0
  29   call void @llvm.amdgcn.kill(i1 %c1)
  30   %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
  31   call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
  32   ret void
  33 }
  34
  35 ; GCN-LABEL: {{^}}true:
  36 ; GCN-NEXT: %bb.
  37 ; GCN-NEXT: %bb.
  38 ; GCN-NEXT: s_endpgm
  39 define amdgpu_gs void @true() {
  40   call void @llvm.amdgcn.kill(i1 true)
  41   ret void
  42 }
  43
  44 ; GCN-LABEL: {{^}}false:
  45 ; GCN-NOT: v_cmpx
  46 ; GCN: s_mov_b64 exec, 0
  47 define amdgpu_gs void @false() {
  48   call void @llvm.amdgcn.kill(i1 false)
  49   ret void
  50 }
  51
  52 ; GCN-LABEL: {{^}}and:
  53 ; GCN: v_cmp_lt_i32
  54 ; GCN: v_cmp_lt_i32
  55 ; GCN: s_or_b64 s[0:1]
  56 ; GCN: s_and_b64 exec, exec, s[0:1]
  57 define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
  58   %c1 = icmp slt i32 %a, %b
  59   %c2 = icmp slt i32 %c, %d
  60   %x = or i1 %c1, %c2
  61   call void @llvm.amdgcn.kill(i1 %x)
  62   ret void
  63 }
  64
  65 ; GCN-LABEL: {{^}}andn2:
  66 ; GCN: v_cmp_lt_i32
  67 ; GCN: v_cmp_lt_i32
  68 ; GCN: s_xor_b64 s[0:1]
  69 ; GCN: s_andn2_b64 exec, exec, s[0:1]
  70 define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
  71   %c1 = icmp slt i32 %a, %b
  72   %c2 = icmp slt i32 %c, %d
  73   %x = xor i1 %c1, %c2
  74   %y = xor i1 %x, 1
  75   call void @llvm.amdgcn.kill(i1 %y)
  76   ret void
  77 }
  78
  79 ; GCN-LABEL: {{^}}oeq:
  80 ; GCN: v_cmpx_eq_f32
  81 ; GCN-NOT: s_and
  82 define amdgpu_gs void @oeq(float %a) {
  83   %c1 = fcmp oeq float %a, 0.0
  84   call void @llvm.amdgcn.kill(i1 %c1)
  85   ret void
  86 }
  87
  88 ; GCN-LABEL: {{^}}ogt:
  89 ; GCN: v_cmpx_lt_f32
  90 ; GCN-NOT: s_and
  91 define amdgpu_gs void @ogt(float %a) {
  92   %c1 = fcmp ogt float %a, 0.0
  93   call void @llvm.amdgcn.kill(i1 %c1)
  94   ret void
  95 }
  96
  97 ; GCN-LABEL: {{^}}oge:
  98 ; GCN: v_cmpx_le_f32
  99 ; GCN-NOT: s_and
 100 define amdgpu_gs void @oge(float %a) {
 101   %c1 = fcmp oge float %a, 0.0
 102   call void @llvm.amdgcn.kill(i1 %c1)
 103   ret void
 104 }
 105
 106 ; GCN-LABEL: {{^}}olt:
 107 ; GCN: v_cmpx_gt_f32
 108 ; GCN-NOT: s_and
 109 define amdgpu_gs void @olt(float %a) {
 110   %c1 = fcmp olt float %a, 0.0
 111   call void @llvm.amdgcn.kill(i1 %c1)
 112   ret void
 113 }
 114
 115 ; GCN-LABEL: {{^}}ole:
 116 ; GCN: v_cmpx_ge_f32
 117 ; GCN-NOT: s_and
 118 define amdgpu_gs void @ole(float %a) {
 119   %c1 = fcmp ole float %a, 0.0
 120   call void @llvm.amdgcn.kill(i1 %c1)
 121   ret void
 122 }
 123
 124 ; GCN-LABEL: {{^}}one:
 125 ; GCN: v_cmpx_lg_f32
 126 ; GCN-NOT: s_and
 127 define amdgpu_gs void @one(float %a) {
 128   %c1 = fcmp one float %a, 0.0
 129   call void @llvm.amdgcn.kill(i1 %c1)
 130   ret void
 131 }
 132
 133 ; GCN-LABEL: {{^}}ord:
 134 ; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
 135 ; GCN: v_cmp_o_f32
 136 define amdgpu_gs void @ord(float %a) {
 137   %c1 = fcmp ord float %a, 0.0
 138   call void @llvm.amdgcn.kill(i1 %c1)
 139   ret void
 140 }
 141
 142 ; GCN-LABEL: {{^}}uno:
 143 ; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
 144 ; GCN: v_cmp_u_f32
 145 define amdgpu_gs void @uno(float %a) {
 146   %c1 = fcmp uno float %a, 0.0
 147   call void @llvm.amdgcn.kill(i1 %c1)
 148   ret void
 149 }
 150
 151 ; GCN-LABEL: {{^}}ueq:
 152 ; GCN: v_cmpx_nlg_f32
 153 ; GCN-NOT: s_and
 154 define amdgpu_gs void @ueq(float %a) {
 155   %c1 = fcmp ueq float %a, 0.0
 156   call void @llvm.amdgcn.kill(i1 %c1)
 157   ret void
 158 }
 159
 160 ; GCN-LABEL: {{^}}ugt:
 161 ; GCN: v_cmpx_nge_f32
 162 ; GCN-NOT: s_and
 163 define amdgpu_gs void @ugt(float %a) {
 164   %c1 = fcmp ugt float %a, 0.0
 165   call void @llvm.amdgcn.kill(i1 %c1)
 166   ret void
 167 }
 168
 169 ; GCN-LABEL: {{^}}uge:
 170 ; SI: v_cmpx_ngt_f32_e32 vcc, -1.0
 171 ; GFX10: v_cmpx_ngt_f32_e32 -1.0
 172 ; GCN-NOT: s_and
 173 define amdgpu_gs void @uge(float %a) {
 174   %c1 = fcmp uge float %a, -1.0
 175   call void @llvm.amdgcn.kill(i1 %c1)
 176   ret void
 177 }
 178
 179 ; GCN-LABEL: {{^}}ult:
 180 ; SI: v_cmpx_nle_f32_e32 vcc, -2.0
 181 ; GFX10: v_cmpx_nle_f32_e32 -2.0
 182 ; GCN-NOT: s_and
 183 define amdgpu_gs void @ult(float %a) {
 184   %c1 = fcmp ult float %a, -2.0
 185   call void @llvm.amdgcn.kill(i1 %c1)
 186   ret void
 187 }
 188
 189 ; GCN-LABEL: {{^}}ule:
 190 ; SI: v_cmpx_nlt_f32_e32 vcc, 2.0
 191 ; GFX10: v_cmpx_nlt_f32_e32 2.0
 192 ; GCN-NOT: s_and
 193 define amdgpu_gs void @ule(float %a) {
 194   %c1 = fcmp ule float %a, 2.0
 195   call void @llvm.amdgcn.kill(i1 %c1)
 196   ret void
 197 }
 198
 199 ; GCN-LABEL: {{^}}une:
 200 ; SI: v_cmpx_neq_f32_e32 vcc, 0
 201 ; GFX10: v_cmpx_neq_f32_e32 0
 202 ; GCN-NOT: s_and
 203 define amdgpu_gs void @une(float %a) {
 204   %c1 = fcmp une float %a, 0.0
 205   call void @llvm.amdgcn.kill(i1 %c1)
 206   ret void
 207 }
 208
 209 ; GCN-LABEL: {{^}}neg_olt:
 210 ; SI: v_cmpx_ngt_f32_e32 vcc, 1.0
 211 ; GFX10: v_cmpx_ngt_f32_e32 1.0
 212 ; GCN-NOT: s_and
 213 define amdgpu_gs void @neg_olt(float %a) {
 214   %c1 = fcmp olt float %a, 1.0
 215   %c2 = xor i1 %c1, 1
 216   call void @llvm.amdgcn.kill(i1 %c2)
 217   ret void
 218 }
 219
 220 ; GCN-LABEL: {{^}}fcmp_x2:
 221 ; FIXME: LLVM should be able to combine these fcmp opcodes.
 222 ; SI: v_cmp_lt_f32_e32 vcc, s{{[0-9]+}}, v0
 223 ; GFX10: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
 224 ; GCN: v_cndmask_b32
 225 ; GCN: v_cmpx_le_f32
 226 define amdgpu_ps void @fcmp_x2(float %a) #0 {
 227   %ogt = fcmp nsz ogt float %a, 2.500000e-01
 228   %k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00
 229   %c = fcmp nsz oge float %k, 0.000000e+00
 230   call void @llvm.amdgcn.kill(i1 %c) #1
 231   ret void
 232 }
 233
 234 ; GCN-LABEL: {{^}}wqm:
 235 ; GCN: v_cmp_neq_f32_e32 vcc, 0
 236 ; GCN: s_wqm_b64 s[0:1], vcc
 237 ; GCN: s_and_b64 exec, exec, s[0:1]
 238 define amdgpu_ps void @wqm(float %a) {
 239   %c1 = fcmp une float %a, 0.0
 240   %c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1)
 241   call void @llvm.amdgcn.kill(i1 %c2)
 242   ret void
 243 }
 244
 245 ; This checks that we use the 64-bit encoding when the operand is a SGPR.
 246 ; GCN-LABEL: {{^}}test_sgpr:
 247 ; GCN: v_cmpx_ge_f32_e64
 248 define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
 249   %c = fcmp ole float %a, 1.000000e+00
 250   call void @llvm.amdgcn.kill(i1 %c) #1
 251   ret void
 252 }
 253
 254 ; GCN-LABEL: {{^}}test_non_inline_imm_sgpr:
 255 ; GCN-NOT: v_cmpx_ge_f32_e64
 256 define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
 257   %c = fcmp ole float %a, 1.500000e+00
 258   call void @llvm.amdgcn.kill(i1 %c) #1
 259   ret void
 260 }
 261
 262 ; GCN-LABEL: {{^}}test_scc_liveness:
 263 ; GCN: v_cmp
 264 ; GCN: s_and_b64 exec
 265 ; GCN: s_cmp
 266 ; GCN: s_cbranch_scc
 267 define amdgpu_ps void @test_scc_liveness() #0 {
 268 main_body:
 269   br label %loop3
 270
 271 loop3:                                            ; preds = %loop3, %main_body
 272   %tmp = phi i32 [ 0, %main_body ], [ %tmp5, %loop3 ]
 273   %tmp1 = icmp sgt i32 %tmp, 0
 274   call void @llvm.amdgcn.kill(i1 %tmp1) #1
 275   %tmp5 = add i32 %tmp, 1
 276   br i1 %tmp1, label %endloop15, label %loop3
 277
 278 endloop15:                                        ; preds = %loop3
 279   ret void
 280 }
 281
 282 declare void @llvm.amdgcn.kill(i1) #0
 283 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 284 declare i1 @llvm.amdgcn.wqm.vote(i1)
 285
 286 attributes #0 = { nounwind }