test/CodeGen/AMDGPU/setcc-fneg-constant.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
   2
   3 ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine.
   4
   5 ; GCN-LABEL: {{^}}multi_use_fneg_src:
   6 ; GCN: buffer_load_dword [[A:v[0-9]+]]
   7 ; GCN: buffer_load_dword [[B:v[0-9]+]]
   8 ; GCN: buffer_load_dword [[C:v[0-9]+]]
   9
  10 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
  11 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
  12 ; GCN: buffer_store_dword [[MUL]]
  13 define amdgpu_kernel void @multi_use_fneg_src() #0 {
  14   %a = load volatile float, float addrspace(1)* undef
  15   %b = load volatile float, float addrspace(1)* undef
  16   %x = load volatile i32, i32 addrspace(1)* undef
  17   %y = load volatile i32, i32 addrspace(1)* undef
  18
  19   %mul = fmul float %a, %b
  20   %neg.mul = fsub float -0.0, %mul
  21   %cmp = fcmp oeq float %neg.mul, 4.0
  22   %select = select i1 %cmp, i32 %x, i32 %y
  23   store volatile i32 %select, i32 addrspace(1)* undef
  24   store volatile float %mul, float addrspace(1)* undef
  25   ret void
  26 }
  27
  28 ; GCN-LABEL: {{^}}multi_foldable_use_fneg_src:
  29 ; GCN: buffer_load_dword [[A:v[0-9]+]]
  30 ; GCN: buffer_load_dword [[B:v[0-9]+]]
  31 ; GCN: buffer_load_dword [[C:v[0-9]+]]
  32
  33 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
  34 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
  35 ; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
  36 define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
  37   %a = load volatile float, float addrspace(1)* undef
  38   %b = load volatile float, float addrspace(1)* undef
  39   %x = load volatile i32, i32 addrspace(1)* undef
  40   %y = load volatile i32, i32 addrspace(1)* undef
  41
  42   %mul = fmul float %a, %b
  43   %neg.mul = fsub float -0.0, %mul
  44   %use1 = fmul float %mul, %neg.mul
  45   %cmp = fcmp oeq float %neg.mul, 4.0
  46   %select = select i1 %cmp, i32 %x, i32 %y
  47
  48   store volatile i32 %select, i32 addrspace(1)* undef
  49   store volatile float %use1, float addrspace(1)* undef
  50   ret void
  51 }
  52
  53 ; GCN-LABEL: {{^}}multi_use_fneg:
  54 ; GCN: buffer_load_dword [[A:v[0-9]+]]
  55 ; GCN: buffer_load_dword [[B:v[0-9]+]]
  56 ; GCN: buffer_load_dword [[C:v[0-9]+]]
  57
  58 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
  59 ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
  60 ; GCN-NOT: xor
  61 ; GCN: buffer_store_dword [[MUL]]
  62 define amdgpu_kernel void @multi_use_fneg() #0 {
  63   %a = load volatile float, float addrspace(1)* undef
  64   %b = load volatile float, float addrspace(1)* undef
  65   %x = load volatile i32, i32 addrspace(1)* undef
  66   %y = load volatile i32, i32 addrspace(1)* undef
  67
  68   %mul = fmul float %a, %b
  69   %neg.mul = fsub float -0.0, %mul
  70   %cmp = fcmp oeq float %neg.mul, 4.0
  71   %select = select i1 %cmp, i32 %x, i32 %y
  72   store volatile i32 %select, i32 addrspace(1)* undef
  73   store volatile float %neg.mul, float addrspace(1)* undef
  74   ret void
  75 }
  76
  77 ; GCN-LABEL: {{^}}multi_foldable_use_fneg:
  78 ; GCN: buffer_load_dword [[A:v[0-9]+]]
  79 ; GCN: buffer_load_dword [[B:v[0-9]+]]
  80
  81 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
  82 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
  83 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
  84 ; GCN: buffer_store_dword [[MUL1]]
  85 define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
  86   %a = load volatile float, float addrspace(1)* undef
  87   %b = load volatile float, float addrspace(1)* undef
  88   %x = load volatile i32, i32 addrspace(1)* undef
  89   %y = load volatile i32, i32 addrspace(1)* undef
  90   %z = load volatile i32, i32 addrspace(1)* undef
  91
  92   %mul = fmul float %a, %b
  93   %neg.mul = fsub float -0.0, %mul
  94   %cmp = fcmp oeq float %neg.mul, 4.0
  95   %select = select i1 %cmp, i32 %x, i32 %y
  96   %use1 = fmul float %neg.mul, %mul
  97   store volatile i32 %select, i32 addrspace(1)* undef
  98   store volatile float %use1, float addrspace(1)* undef
  99   ret void
 100 }
 101
 102 ; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
 103 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
 104 define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
 105   %a = load volatile float, float addrspace(1)* undef
 106   %x = load volatile i32, i32 addrspace(1)* undef
 107   %y = load volatile i32, i32 addrspace(1)* undef
 108   %neg.a = fsub float -0.0, %a
 109   %cmp = fcmp oeq float %neg.a, 4.0
 110   %select = select i1 %cmp, i32 %x, i32 %y
 111   store volatile i32 %select, i32 addrspace(1)* undef
 112   ret void
 113 }
 114
 115 ; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
 116 ; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
 117 define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
 118   %a = load volatile float, float addrspace(1)* undef
 119   %x = load volatile i32, i32 addrspace(1)* undef
 120   %y = load volatile i32, i32 addrspace(1)* undef
 121   %neg.a = fsub float -0.0, %a
 122   %cmp = fcmp ogt float %neg.a, 4.0
 123   %select = select i1 %cmp, i32 %x, i32 %y
 124   store volatile i32 %select, i32 addrspace(1)* undef
 125   ret void
 126 }
 127
 128 ; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
 129 ; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
 130 define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
 131   %a = load volatile float, float addrspace(1)* undef
 132   %x = load volatile i32, i32 addrspace(1)* undef
 133   %y = load volatile i32, i32 addrspace(1)* undef
 134   %neg.a = fsub float -0.0, %a
 135   %cmp = fcmp oge float %neg.a, 4.0
 136   %select = select i1 %cmp, i32 %x, i32 %y
 137   store volatile i32 %select, i32 addrspace(1)* undef
 138   ret void
 139 }
 140
 141 ; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
 142 ; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
 143 define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
 144   %a = load volatile float, float addrspace(1)* undef
 145   %x = load volatile i32, i32 addrspace(1)* undef
 146   %y = load volatile i32, i32 addrspace(1)* undef
 147   %neg.a = fsub float -0.0, %a
 148   %cmp = fcmp olt float %neg.a, 4.0
 149   %select = select i1 %cmp, i32 %x, i32 %y
 150   store volatile i32 %select, i32 addrspace(1)* undef
 151   ret void
 152 }
 153
 154 ; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
 155 ; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
 156 define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
 157   %a = load volatile float, float addrspace(1)* undef
 158   %x = load volatile i32, i32 addrspace(1)* undef
 159   %y = load volatile i32, i32 addrspace(1)* undef
 160   %neg.a = fsub float -0.0, %a
 161   %cmp = fcmp ole float %neg.a, 4.0
 162   %select = select i1 %cmp, i32 %x, i32 %y
 163   store volatile i32 %select, i32 addrspace(1)* undef
 164   ret void
 165 }
 166
 167 ; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
 168 ; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
 169 define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
 170   %a = load volatile float, float addrspace(1)* undef
 171   %x = load volatile i32, i32 addrspace(1)* undef
 172   %y = load volatile i32, i32 addrspace(1)* undef
 173   %neg.a = fsub float -0.0, %a
 174   %cmp = fcmp one float %neg.a, 4.0
 175   %select = select i1 %cmp, i32 %x, i32 %y
 176   store volatile i32 %select, i32 addrspace(1)* undef
 177   ret void
 178 }
 179
 180 ; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
 181 ; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
 182 define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
 183   %a = load volatile float, float addrspace(1)* undef
 184   %x = load volatile i32, i32 addrspace(1)* undef
 185   %y = load volatile i32, i32 addrspace(1)* undef
 186   %neg.a = fsub float -0.0, %a
 187   %cmp = fcmp ueq float %neg.a, 4.0
 188   %select = select i1 %cmp, i32 %x, i32 %y
 189   store volatile i32 %select, i32 addrspace(1)* undef
 190   ret void
 191 }
 192
 193 ; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
 194 ; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
 195 define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
 196   %a = load volatile float, float addrspace(1)* undef
 197   %x = load volatile i32, i32 addrspace(1)* undef
 198   %y = load volatile i32, i32 addrspace(1)* undef
 199   %neg.a = fsub float -0.0, %a
 200   %cmp = fcmp ugt float %neg.a, 4.0
 201   %select = select i1 %cmp, i32 %x, i32 %y
 202   store volatile i32 %select, i32 addrspace(1)* undef
 203   ret void
 204 }
 205
 206 ; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
 207 ; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
 208 define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
 209   %a = load volatile float, float addrspace(1)* undef
 210   %x = load volatile i32, i32 addrspace(1)* undef
 211   %y = load volatile i32, i32 addrspace(1)* undef
 212   %neg.a = fsub float -0.0, %a
 213   %cmp = fcmp uge float %neg.a, 4.0
 214   %select = select i1 %cmp, i32 %x, i32 %y
 215   store volatile i32 %select, i32 addrspace(1)* undef
 216   ret void
 217 }
 218
 219 ; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
 220 ; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
 221 define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
 222   %a = load volatile float, float addrspace(1)* undef
 223   %x = load volatile i32, i32 addrspace(1)* undef
 224   %y = load volatile i32, i32 addrspace(1)* undef
 225   %neg.a = fsub float -0.0, %a
 226   %cmp = fcmp ult float %neg.a, 4.0
 227   %select = select i1 %cmp, i32 %x, i32 %y
 228   store volatile i32 %select, i32 addrspace(1)* undef
 229   ret void
 230 }
 231
 232 ; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
 233 ; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
 234 define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
 235   %a = load volatile float, float addrspace(1)* undef
 236   %x = load volatile i32, i32 addrspace(1)* undef
 237   %y = load volatile i32, i32 addrspace(1)* undef
 238   %neg.a = fsub float -0.0, %a
 239   %cmp = fcmp ule float %neg.a, 4.0
 240   %select = select i1 %cmp, i32 %x, i32 %y
 241   store volatile i32 %select, i32 addrspace(1)* undef
 242   ret void
 243 }
 244
 245 ; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
 246 ; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
 247 define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
 248   %a = load volatile float, float addrspace(1)* undef
 249   %x = load volatile i32, i32 addrspace(1)* undef
 250   %y = load volatile i32, i32 addrspace(1)* undef
 251   %neg.a = fsub float -0.0, %a
 252   %cmp = fcmp une float %neg.a, 4.0
 253   %select = select i1 %cmp, i32 %x, i32 %y
 254   store volatile i32 %select, i32 addrspace(1)* undef
 255   ret void
 256 }
 257
 258 attributes #0 = { nounwind }