test/CodeGen/AMDGPU/fneg-combines.ll

   1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
   3
   4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
   5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
   6
   7 ; --------------------------------------------------------------------------------
   8 ; fadd tests
   9 ; --------------------------------------------------------------------------------
  10
  11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
  12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  14
  15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
  17
  18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
  19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  21   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  22   %tid.ext = sext i32 %tid to i64
  23   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  24   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  25   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  26   %a = load volatile float, float addrspace(1)* %a.gep
  27   %b = load volatile float, float addrspace(1)* %b.gep
  28   %add = fadd float %a, %b
  29   %fneg = fsub float -0.000000e+00, %add
  30   store float %fneg, float addrspace(1)* %out.gep
  31   ret void
  32 }
  33
  34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
  35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
  39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
  40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
  41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  42   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  43   %tid.ext = sext i32 %tid to i64
  44   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  45   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  46   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  47   %a = load volatile float, float addrspace(1)* %a.gep
  48   %b = load volatile float, float addrspace(1)* %b.gep
  49   %add = fadd float %a, %b
  50   %fneg = fsub float -0.000000e+00, %add
  51   store volatile float %fneg, float addrspace(1)* %out
  52   store volatile float %add, float addrspace(1)* %out
  53   ret void
  54 }
  55
  56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
  57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  59
  60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
  62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
  63
  64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
  65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
  66
  67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
  68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
  69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  70   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  71   %tid.ext = sext i32 %tid to i64
  72   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  73   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  74   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  75   %a = load volatile float, float addrspace(1)* %a.gep
  76   %b = load volatile float, float addrspace(1)* %b.gep
  77   %add = fadd float %a, %b
  78   %fneg = fsub float -0.000000e+00, %add
  79   %use1 = fmul float %add, 4.0
  80   store volatile float %fneg, float addrspace(1)* %out
  81   store volatile float %use1, float addrspace(1)* %out
  82   ret void
  83 }
  84
  85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
  86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  88
  89 ; GCN-SAFE: v_sub_f32_e32
  90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
  91
  92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  93
  94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
  95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  96   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  97   %tid.ext = sext i32 %tid to i64
  98   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  99   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 100   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 101   %a = load volatile float, float addrspace(1)* %a.gep
 102   %b = load volatile float, float addrspace(1)* %b.gep
 103   %fneg.a = fsub float -0.000000e+00, %a
 104   %add = fadd float %fneg.a, %b
 105   %fneg = fsub float -0.000000e+00, %add
 106   store volatile float %fneg, float addrspace(1)* %out
 107   ret void
 108 }
 109
 110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
 111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 113
 114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 116
 117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 120   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 121   %tid.ext = sext i32 %tid to i64
 122   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 123   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 124   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 125   %a = load volatile float, float addrspace(1)* %a.gep
 126   %b = load volatile float, float addrspace(1)* %b.gep
 127   %fneg.b = fsub float -0.000000e+00, %b
 128   %add = fadd float %a, %fneg.b
 129   %fneg = fsub float -0.000000e+00, %add
 130   store volatile float %fneg, float addrspace(1)* %out
 131   ret void
 132 }
 133
 134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
 135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 137
 138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
 139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 140
 141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 144   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 145   %tid.ext = sext i32 %tid to i64
 146   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 147   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 148   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 149   %a = load volatile float, float addrspace(1)* %a.gep
 150   %b = load volatile float, float addrspace(1)* %b.gep
 151   %fneg.a = fsub float -0.000000e+00, %a
 152   %fneg.b = fsub float -0.000000e+00, %b
 153   %add = fadd float %fneg.a, %fneg.b
 154   %fneg = fsub float -0.000000e+00, %add
 155   store volatile float %fneg, float addrspace(1)* %out
 156   ret void
 157 }
 158
 159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
 160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
 161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 163
 164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
 165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
 167
 168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
 170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
 171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 173   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 174   %tid.ext = sext i32 %tid to i64
 175   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 176   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 177   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 178   %a = load volatile float, float addrspace(1)* %a.gep
 179   %b = load volatile float, float addrspace(1)* %b.gep
 180   %fneg.a = fsub float -0.000000e+00, %a
 181   %add = fadd float %fneg.a, %b
 182   %fneg = fsub float -0.000000e+00, %add
 183   store volatile float %fneg, float addrspace(1)* %out
 184   store volatile float %fneg.a, float addrspace(1)* %out
 185   ret void
 186 }
 187
 188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
 189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 191
 192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 195
 196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
 197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
 199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
 201   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 202   %tid.ext = sext i32 %tid to i64
 203   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 204   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 205   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 206   %a = load volatile float, float addrspace(1)* %a.gep
 207   %b = load volatile float, float addrspace(1)* %b.gep
 208   %fneg.a = fsub float -0.000000e+00, %a
 209   %add = fadd float %fneg.a, %b
 210   %fneg = fsub float -0.000000e+00, %add
 211   %use1 = fmul float %fneg.a, %c
 212   store volatile float %fneg, float addrspace(1)* %out
 213   store volatile float %use1, float addrspace(1)* %out
 214   ret void
 215 }
 216
 217 ; --------------------------------------------------------------------------------
 218 ; fmul tests
 219 ; --------------------------------------------------------------------------------
 220
 221 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
 222 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 223 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 224 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 225 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 226 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 227   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 228   %tid.ext = sext i32 %tid to i64
 229   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 230   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 231   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 232   %a = load volatile float, float addrspace(1)* %a.gep
 233   %b = load volatile float, float addrspace(1)* %b.gep
 234   %mul = fmul float %a, %b
 235   %fneg = fsub float -0.000000e+00, %mul
 236   store float %fneg, float addrspace(1)* %out.gep
 237   ret void
 238 }
 239
 240 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
 241 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 242 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 243 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 244 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
 245 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 246 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 247 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 248   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 249   %tid.ext = sext i32 %tid to i64
 250   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 251   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 252   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 253   %a = load volatile float, float addrspace(1)* %a.gep
 254   %b = load volatile float, float addrspace(1)* %b.gep
 255   %mul = fmul float %a, %b
 256   %fneg = fsub float -0.000000e+00, %mul
 257   store volatile float %fneg, float addrspace(1)* %out
 258   store volatile float %mul, float addrspace(1)* %out
 259   ret void
 260 }
 261
 262 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
 263 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 264 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 265 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
 266 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
 267
 268 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
 269 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 270 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 271   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 272   %tid.ext = sext i32 %tid to i64
 273   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 274   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 275   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 276   %a = load volatile float, float addrspace(1)* %a.gep
 277   %b = load volatile float, float addrspace(1)* %b.gep
 278   %mul = fmul float %a, %b
 279   %fneg = fsub float -0.000000e+00, %mul
 280   %use1 = fmul float %mul, 4.0
 281   store volatile float %fneg, float addrspace(1)* %out
 282   store volatile float %use1, float addrspace(1)* %out
 283   ret void
 284 }
 285
 286 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
 287 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 288 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 289 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 290 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 291 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 292   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 293   %tid.ext = sext i32 %tid to i64
 294   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 295   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 296   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 297   %a = load volatile float, float addrspace(1)* %a.gep
 298   %b = load volatile float, float addrspace(1)* %b.gep
 299   %fneg.a = fsub float -0.000000e+00, %a
 300   %mul = fmul float %fneg.a, %b
 301   %fneg = fsub float -0.000000e+00, %mul
 302   store volatile float %fneg, float addrspace(1)* %out
 303   ret void
 304 }
 305
 306 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
 307 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 308 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 309 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 310 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 311 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 312   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 313   %tid.ext = sext i32 %tid to i64
 314   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 315   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 316   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 317   %a = load volatile float, float addrspace(1)* %a.gep
 318   %b = load volatile float, float addrspace(1)* %b.gep
 319   %fneg.b = fsub float -0.000000e+00, %b
 320   %mul = fmul float %a, %fneg.b
 321   %fneg = fsub float -0.000000e+00, %mul
 322   store volatile float %fneg, float addrspace(1)* %out
 323   ret void
 324 }
 325
 326 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
 327 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 328 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 329 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 330 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 331 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 332   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 333   %tid.ext = sext i32 %tid to i64
 334   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 335   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 336   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 337   %a = load volatile float, float addrspace(1)* %a.gep
 338   %b = load volatile float, float addrspace(1)* %b.gep
 339   %fneg.a = fsub float -0.000000e+00, %a
 340   %fneg.b = fsub float -0.000000e+00, %b
 341   %mul = fmul float %fneg.a, %fneg.b
 342   %fneg = fsub float -0.000000e+00, %mul
 343   store volatile float %fneg, float addrspace(1)* %out
 344   ret void
 345 }
 346
 347 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
 348 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 349 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 350 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 351 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
 352
 353 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 354 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 355 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 356   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 357   %tid.ext = sext i32 %tid to i64
 358   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 359   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 360   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 361   %a = load volatile float, float addrspace(1)* %a.gep
 362   %b = load volatile float, float addrspace(1)* %b.gep
 363   %fneg.a = fsub float -0.000000e+00, %a
 364   %mul = fmul float %fneg.a, %b
 365   %fneg = fsub float -0.000000e+00, %mul
 366   store volatile float %fneg, float addrspace(1)* %out
 367   store volatile float %fneg.a, float addrspace(1)* %out
 368   ret void
 369 }
 370
 371 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
 372 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 373 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 374 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
 375 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 376 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 377 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 378 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
 379   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 380   %tid.ext = sext i32 %tid to i64
 381   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 382   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 383   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 384   %a = load volatile float, float addrspace(1)* %a.gep
 385   %b = load volatile float, float addrspace(1)* %b.gep
 386   %fneg.a = fsub float -0.000000e+00, %a
 387   %mul = fmul float %fneg.a, %b
 388   %fneg = fsub float -0.000000e+00, %mul
 389   %use1 = fmul float %fneg.a, %c
 390   store volatile float %fneg, float addrspace(1)* %out
 391   store volatile float %use1, float addrspace(1)* %out
 392   ret void
 393 }
 394
 395 ; --------------------------------------------------------------------------------
 396 ; fminnum tests
 397 ; --------------------------------------------------------------------------------
 398
 399 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
 400 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 401 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 402 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 403 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 404 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 405 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 406 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 407   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 408   %tid.ext = sext i32 %tid to i64
 409   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 410   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 411   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 412   %a = load volatile float, float addrspace(1)* %a.gep
 413   %b = load volatile float, float addrspace(1)* %b.gep
 414   %min = call float @llvm.minnum.f32(float %a, float %b)
 415   %fneg = fsub float -0.000000e+00, %min
 416   store float %fneg, float addrspace(1)* %out.gep
 417   ret void
 418 }
 419
 420 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
 421 ; GCN-NOT: v0
 422 ; GCN-NOT: v1
 423 ; GCN: v_max_f32_e64 v0, -v0, -v1
 424 ; GCN-NEXT: ; return
 425 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
 426   %min = call float @llvm.minnum.f32(float %a, float %b)
 427   %fneg = fsub float -0.000000e+00, %min
 428   ret float %fneg
 429 }
 430
 431 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
 432 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 433 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 434 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 436 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 437   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 438   %tid.ext = sext i32 %tid to i64
 439   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 440   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 441   %a = load volatile float, float addrspace(1)* %a.gep
 442   %min = call float @llvm.minnum.f32(float %a, float %a)
 443   %min.fneg = fsub float -0.0, %min
 444   store float %min.fneg, float addrspace(1)* %out.gep
 445   ret void
 446 }
 447
 448 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
 449 ; GCN-NOT: v0
 450 ; GCN: v_max_f32_e64 v0, -v0, -v0
 451 ; GCN-NEXT: ; return
 452 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
 453   %min = call float @llvm.minnum.f32(float %a, float %a)
 454   %min.fneg = fsub float -0.0, %min
 455   ret float %min.fneg
 456 }
 457
 458 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
 459 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 460 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 461 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 462 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 463 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 464   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 465   %tid.ext = sext i32 %tid to i64
 466   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 467   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 468   %a = load volatile float, float addrspace(1)* %a.gep
 469   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 470   %fneg = fsub float -0.000000e+00, %min
 471   store float %fneg, float addrspace(1)* %out.gep
 472   ret void
 473 }
 474
 475 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
 476 ; GCN-NOT: v0
 477 ; GCN: v_max_f32_e64 v0, -v0, -4.0
 478 ; GCN-NEXT: ; return
 479 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
 480   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 481   %fneg = fsub float -0.000000e+00, %min
 482   ret float %fneg
 483 }
 484
 485 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
 486 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 487 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 488 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 489 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 490 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 491   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 492   %tid.ext = sext i32 %tid to i64
 493   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 494   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 495   %a = load volatile float, float addrspace(1)* %a.gep
 496   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 497   %fneg = fsub float -0.000000e+00, %min
 498   store float %fneg, float addrspace(1)* %out.gep
 499   ret void
 500 }
 501
 502 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
 503 ; GCN-NOT: v0
 504 ; GCN: v_max_f32_e64 v0, -v0, 4.0
 505 ; GCN-NEXT: ; return
 506 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
 507   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 508   %fneg = fsub float -0.000000e+00, %min
 509   ret float %fneg
 510 }
 511
 512 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
 513 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 514 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
 515 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 516 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 517   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 518   %tid.ext = sext i32 %tid to i64
 519   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 520   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 521   %a = load volatile float, float addrspace(1)* %a.gep
 522   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 523   %fneg = fsub float -0.000000e+00, %min
 524   store float %fneg, float addrspace(1)* %out.gep
 525   ret void
 526 }
 527
 528 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
 529 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 530 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 531 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 532 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 533 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 534   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 535   %tid.ext = sext i32 %tid to i64
 536   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 537   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 538   %a = load volatile float, float addrspace(1)* %a.gep
 539   %min = call float @llvm.minnum.f32(float -0.0, float %a)
 540   %fneg = fsub float -0.000000e+00, %min
 541   store float %fneg, float addrspace(1)* %out.gep
 542   ret void
 543 }
 544
 545 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
 546 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 547
 548 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
 549 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
 550
 551 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
 552 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
 553 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 554
 555 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 556 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 557   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 558   %tid.ext = sext i32 %tid to i64
 559   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 560   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 561   %a = load volatile float, float addrspace(1)* %a.gep
 562   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
 563   %fneg = fsub float -0.000000e+00, %min
 564   store float %fneg, float addrspace(1)* %out.gep
 565   ret void
 566 }
 567
 568 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
 569 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 570
 571 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
 572 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
 573
 574 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
 575 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 576
 577 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 578 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 579   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 580   %tid.ext = sext i32 %tid to i64
 581   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 582   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 583   %a = load volatile float, float addrspace(1)* %a.gep
 584   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
 585   %fneg = fsub float -0.000000e+00, %min
 586   store float %fneg, float addrspace(1)* %out.gep
 587   ret void
 588 }
 589
 590 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
 591 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 592
 593 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
 594 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
 595 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
 596
 597 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
 598 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
 599 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
 600
 601 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 602 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
 603   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 604   %tid.ext = sext i32 %tid to i64
 605   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
 606   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
 607   %a = load volatile half, half addrspace(1)* %a.gep
 608   %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
 609   %fneg = fsub half -0.000000e+00, %min
 610   store half %fneg, half addrspace(1)* %out.gep
 611   ret void
 612 }
 613
 614 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
 615 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 616
 617 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
 618 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
 619 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
 620
 621 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
 622 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 623
 624 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 625 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
 626   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 627   %tid.ext = sext i32 %tid to i64
 628   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
 629   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
 630   %a = load volatile half, half addrspace(1)* %a.gep
 631   %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
 632   %fneg = fsub half -0.000000e+00, %min
 633   store half %fneg, half addrspace(1)* %out.gep
 634   ret void
 635 }
 636
 637 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
 638 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 639
 640 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
 641 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
 642 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 643 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
 644
 645 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
 646 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
 647
 648 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
 649 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
 650   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 651   %tid.ext = sext i32 %tid to i64
 652   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
 653   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
 654   %a = load volatile double, double addrspace(1)* %a.gep
 655   %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
 656   %fneg = fsub double -0.000000e+00, %min
 657   store double %fneg, double addrspace(1)* %out.gep
 658   ret void
 659 }
 660
 661 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
 662 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 663
 664 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
 665 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
 666 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 667 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
 668
 669 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 670 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
 671
 672 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 673 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
 674   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 675   %tid.ext = sext i32 %tid to i64
 676   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
 677   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
 678   %a = load volatile double, double addrspace(1)* %a.gep
 679   %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
 680   %fneg = fsub double -0.000000e+00, %min
 681   store double %fneg, double addrspace(1)* %out.gep
 682   ret void
 683 }
 684
 685 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
 686 ; GCN-NOT: v0
 687 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
 688 ; GCN-NEXT: ; return
 689 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
 690   %min = call float @llvm.minnum.f32(float -0.0, float %a)
 691   %fneg = fsub float -0.000000e+00, %min
 692   ret float %fneg
 693 }
 694
 695 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
 696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 697 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 698 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
 699 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
 700 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 701 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 702 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 703   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 704   %tid.ext = sext i32 %tid to i64
 705   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 706   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 707   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 708   %a = load volatile float, float addrspace(1)* %a.gep
 709   %b = load volatile float, float addrspace(1)* %b.gep
 710   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 711   %fneg = fsub float -0.000000e+00, %min
 712   %mul = fmul float %fneg, %b
 713   store float %mul, float addrspace(1)* %out.gep
 714   ret void
 715 }
 716
 717 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
 718 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 719 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 720
 721 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
 722
 723 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
 724 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
 725
 726 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
 727 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
 728 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 729
 730 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 731 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 732   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 733   %tid.ext = sext i32 %tid to i64
 734   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 735   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 736   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 737   %a = load volatile float, float addrspace(1)* %a.gep
 738   %b = load volatile float, float addrspace(1)* %b.gep
 739   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
 740   %fneg = fsub float -0.000000e+00, %min
 741   %mul = fmul float %fneg, %b
 742   store float %mul, float addrspace(1)* %out.gep
 743   ret void
 744 }
 745
 746 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
 747 ; GCN-NOT: v0
 748 ; GCN-NOT: v1
 749 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
 750 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
 751 ; GCN-NEXT: ; return
 752 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
 753   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 754   %fneg = fsub float -0.000000e+00, %min
 755   %mul = fmul float %fneg, %b
 756   ret float %mul
 757 }
 758
 759 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
 760 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 761 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 762 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 763 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 764 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 765 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
 766 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
 767 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 768 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 769   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 770   %tid.ext = sext i32 %tid to i64
 771   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 772   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 773   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 774   %a = load volatile float, float addrspace(1)* %a.gep
 775   %b = load volatile float, float addrspace(1)* %b.gep
 776   %min = call float @llvm.minnum.f32(float %a, float %b)
 777   %fneg = fsub float -0.000000e+00, %min
 778   %use1 = fmul float %min, 4.0
 779   store volatile float %fneg, float addrspace(1)* %out
 780   store volatile float %use1, float addrspace(1)* %out
 781   ret void
 782 }
 783
 784 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
 785 ; GCN-NOT: v0
 786 ; GCN-NOT: v1
 787 ; GCN: v_max_f32_e64 v0, -v0, -v1
 788 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
 789 ; GCN-NEXT: ; return
 790 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
 791   %min = call float @llvm.minnum.f32(float %a, float %b)
 792   %fneg = fsub float -0.000000e+00, %min
 793   %use1 = fmul float %min, 4.0
 794   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
 795   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
 796   ret <2 x float> %ins1
 797 }
 798
 799 ; --------------------------------------------------------------------------------
 800 ; fmaxnum tests
 801 ; --------------------------------------------------------------------------------
 802
 803
 804 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
 805 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 806 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 807 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 808 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 809 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 810 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 811 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 812   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 813   %tid.ext = sext i32 %tid to i64
 814   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 815   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 816   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 817   %a = load volatile float, float addrspace(1)* %a.gep
 818   %b = load volatile float, float addrspace(1)* %b.gep
 819   %max = call float @llvm.maxnum.f32(float %a, float %b)
 820   %fneg = fsub float -0.000000e+00, %max
 821   store float %fneg, float addrspace(1)* %out.gep
 822   ret void
 823 }
 824
 825 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
 826 ; GCN-NOT: v0
 827 ; GCN-NOT: v1
 828 ; GCN: v_min_f32_e64 v0, -v0, -v1
 829 ; GCN-NEXT: ; return
 830 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
 831   %max = call float @llvm.maxnum.f32(float %a, float %b)
 832   %fneg = fsub float -0.000000e+00, %max
 833   ret float %fneg
 834 }
 835
 836 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
 837 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 838 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 839 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 840 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 841 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 842   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 843   %tid.ext = sext i32 %tid to i64
 844   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 845   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 846   %a = load volatile float, float addrspace(1)* %a.gep
 847   %max = call float @llvm.maxnum.f32(float %a, float %a)
 848   %max.fneg = fsub float -0.0, %max
 849   store float %max.fneg, float addrspace(1)* %out.gep
 850   ret void
 851 }
 852
 853 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
 854 ; GCN-NOT: v0
 855 ; GCN: v_min_f32_e64 v0, -v0, -v0
 856 ; GCN-NEXT: ; return
 857 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
 858   %max = call float @llvm.maxnum.f32(float %a, float %a)
 859   %max.fneg = fsub float -0.0, %max
 860   ret float %max.fneg
 861 }
 862
 863 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
 864 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 865 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 866 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 867 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 868 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 869   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 870   %tid.ext = sext i32 %tid to i64
 871   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 872   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 873   %a = load volatile float, float addrspace(1)* %a.gep
 874   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
 875   %fneg = fsub float -0.000000e+00, %max
 876   store float %fneg, float addrspace(1)* %out.gep
 877   ret void
 878 }
 879
 880 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
 881 ; GCN-NOT: v0
 882 ; GCN: v_min_f32_e64 v0, -v0, -4.0
 883 ; GCN-NEXT: ; return
 884 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
 885   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
 886   %fneg = fsub float -0.000000e+00, %max
 887   ret float %fneg
 888 }
 889
 890 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
 891 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 892 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 893 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 894 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 895 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 896   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 897   %tid.ext = sext i32 %tid to i64
 898   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 899   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 900   %a = load volatile float, float addrspace(1)* %a.gep
 901   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
 902   %fneg = fsub float -0.000000e+00, %max
 903   store float %fneg, float addrspace(1)* %out.gep
 904   ret void
 905 }
 906
 907 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
 908 ; GCN-NOT: v0
 909 ; GCN: v_min_f32_e64 v0, -v0, 4.0
 910 ; GCN-NEXT: ; return
 911 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
 912   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
 913   %fneg = fsub float -0.000000e+00, %max
 914   ret float %fneg
 915 }
 916
 917 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
 918 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 919 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
 920 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 921 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 922   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 923   %tid.ext = sext i32 %tid to i64
 924   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 925   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 926   %a = load volatile float, float addrspace(1)* %a.gep
 927   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
 928   %fneg = fsub float -0.000000e+00, %max
 929   store float %fneg, float addrspace(1)* %out.gep
 930   ret void
 931 }
 932
 933 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
 934 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 935 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 936 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 937 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 938 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 939   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 940   %tid.ext = sext i32 %tid to i64
 941   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 942   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 943   %a = load volatile float, float addrspace(1)* %a.gep
 944   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
 945   %fneg = fsub float -0.000000e+00, %max
 946   store float %fneg, float addrspace(1)* %out.gep
 947   ret void
 948 }
 949
 950 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
 951 ; GCN-NOT: v0
 952 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
 953 ; GCN-NEXT: ; return
 954 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
 955   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
 956   %fneg = fsub float -0.000000e+00, %max
 957   ret float %fneg
 958 }
 959
 960 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
 961 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 962 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 963 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
 964 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
 965 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
 966 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 967 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 968   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 969   %tid.ext = sext i32 %tid to i64
 970   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 971   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 972   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 973   %a = load volatile float, float addrspace(1)* %a.gep
 974   %b = load volatile float, float addrspace(1)* %b.gep
 975   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
 976   %fneg = fsub float -0.000000e+00, %max
 977   %mul = fmul float %fneg, %b
 978   store float %mul, float addrspace(1)* %out.gep
 979   ret void
 980 }
 981
 982 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
 983 ; GCN-NOT: v0
 984 ; GCN-NOT: v1
 985 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
 986 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
 987 ; GCN-NEXT: ; return
 988 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
 989   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
 990   %fneg = fsub float -0.000000e+00, %max
 991   %mul = fmul float %fneg, %b
 992   ret float %mul
 993 }
 994
 995 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
 996 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 997 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 998 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 999 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1000 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1001 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1002 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1003 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1004 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1005   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1006   %tid.ext = sext i32 %tid to i64
1007   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1008   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1009   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1010   %a = load volatile float, float addrspace(1)* %a.gep
1011   %b = load volatile float, float addrspace(1)* %b.gep
1012   %max = call float @llvm.maxnum.f32(float %a, float %b)
1013   %fneg = fsub float -0.000000e+00, %max
1014   %use1 = fmul float %max, 4.0
1015   store volatile float %fneg, float addrspace(1)* %out
1016   store volatile float %use1, float addrspace(1)* %out
1017   ret void
1018 }
1019
1020 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1021 ; GCN-NOT: v0
1022 ; GCN-NOT: v1
1023 ; GCN: v_min_f32_e64 v0, -v0, -v1
1024 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1025 ; GCN-NEXT: ; return
1026 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1027   %max = call float @llvm.maxnum.f32(float %a, float %b)
1028   %fneg = fsub float -0.000000e+00, %max
1029   %use1 = fmul float %max, 4.0
1030   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1031   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1032   ret <2 x float> %ins1
1033 }
1034
1035 ; --------------------------------------------------------------------------------
1036 ; fma tests
1037 ; --------------------------------------------------------------------------------
1038
1039 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1040 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1041 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1042 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1043
1044 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1045 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1046
1047 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1048 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1049 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1050   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1051   %tid.ext = sext i32 %tid to i64
1052   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1053   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1054   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1055   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1056   %a = load volatile float, float addrspace(1)* %a.gep
1057   %b = load volatile float, float addrspace(1)* %b.gep
1058   %c = load volatile float, float addrspace(1)* %c.gep
1059   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1060   %fneg = fsub float -0.000000e+00, %fma
1061   store float %fneg, float addrspace(1)* %out.gep
1062   ret void
1063 }
1064
1065 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1066 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1067 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1068 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1069 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1070 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1071 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1072 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1073 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1074   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1075   %tid.ext = sext i32 %tid to i64
1076   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1077   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1078   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1079   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1080   %a = load volatile float, float addrspace(1)* %a.gep
1081   %b = load volatile float, float addrspace(1)* %b.gep
1082   %c = load volatile float, float addrspace(1)* %c.gep
1083   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1084   %fneg = fsub float -0.000000e+00, %fma
1085   store volatile float %fneg, float addrspace(1)* %out
1086   store volatile float %fma, float addrspace(1)* %out
1087   ret void
1088 }
1089
1090 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1091 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1092 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1093 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1094
1095 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1096 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1097 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1098
1099 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1100 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1101
1102 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1103 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1104 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1105   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1106   %tid.ext = sext i32 %tid to i64
1107   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1108   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1109   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1110   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1111   %a = load volatile float, float addrspace(1)* %a.gep
1112   %b = load volatile float, float addrspace(1)* %b.gep
1113   %c = load volatile float, float addrspace(1)* %c.gep
1114   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1115   %fneg = fsub float -0.000000e+00, %fma
1116   %use1 = fmul float %fma, 4.0
1117   store volatile float %fneg, float addrspace(1)* %out
1118   store volatile float %use1, float addrspace(1)* %out
1119   ret void
1120 }
1121
1122 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1123 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1124 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1125 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1126
1127 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1128 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1129
1130 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1131 ; GCN-NSZ-NOT: [[FMA]]
1132 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1133 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1134   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1135   %tid.ext = sext i32 %tid to i64
1136   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1137   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1138   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1139   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1140   %a = load volatile float, float addrspace(1)* %a.gep
1141   %b = load volatile float, float addrspace(1)* %b.gep
1142   %c = load volatile float, float addrspace(1)* %c.gep
1143   %fneg.a = fsub float -0.000000e+00, %a
1144   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1145   %fneg = fsub float -0.000000e+00, %fma
1146   store volatile float %fneg, float addrspace(1)* %out
1147   ret void
1148 }
1149
1150 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1151 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1152 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1153 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1154
1155 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1156 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1157
1158 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1159 ; GCN-NSZ-NOT: [[FMA]]
1160 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1161 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1162   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1163   %tid.ext = sext i32 %tid to i64
1164   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1165   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1166   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1167   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1168   %a = load volatile float, float addrspace(1)* %a.gep
1169   %b = load volatile float, float addrspace(1)* %b.gep
1170   %c = load volatile float, float addrspace(1)* %c.gep
1171   %fneg.b = fsub float -0.000000e+00, %b
1172   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1173   %fneg = fsub float -0.000000e+00, %fma
1174   store volatile float %fneg, float addrspace(1)* %out
1175   ret void
1176 }
1177
1178 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1179 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1180 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1181 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1182
1183 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1184 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1185
1186 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1187 ; GCN-NSZ-NOT: [[FMA]]
1188 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1189 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1190   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1191   %tid.ext = sext i32 %tid to i64
1192   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1193   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1194   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1195   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1196   %a = load volatile float, float addrspace(1)* %a.gep
1197   %b = load volatile float, float addrspace(1)* %b.gep
1198   %c = load volatile float, float addrspace(1)* %c.gep
1199   %fneg.a = fsub float -0.000000e+00, %a
1200   %fneg.b = fsub float -0.000000e+00, %b
1201   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1202   %fneg = fsub float -0.000000e+00, %fma
1203   store volatile float %fneg, float addrspace(1)* %out
1204   ret void
1205 }
1206
1207 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1208 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1209 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1210 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1211
1212 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1213 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1214
1215 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1216 ; GCN-NSZ-NOT: [[FMA]]
1217 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1218 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1219   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1220   %tid.ext = sext i32 %tid to i64
1221   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1222   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1223   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1224   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1225   %a = load volatile float, float addrspace(1)* %a.gep
1226   %b = load volatile float, float addrspace(1)* %b.gep
1227   %c = load volatile float, float addrspace(1)* %c.gep
1228   %fneg.a = fsub float -0.000000e+00, %a
1229   %fneg.c = fsub float -0.000000e+00, %c
1230   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1231   %fneg = fsub float -0.000000e+00, %fma
1232   store volatile float %fneg, float addrspace(1)* %out
1233   ret void
1234 }
1235
1236 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1237 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1238 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1239 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1240
1241 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1242 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1243
1244 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1245 ; GCN-NSZ-NOT: [[FMA]]
1246 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1247 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1248   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1249   %tid.ext = sext i32 %tid to i64
1250   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1251   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1252   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1253   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1254   %a = load volatile float, float addrspace(1)* %a.gep
1255   %b = load volatile float, float addrspace(1)* %b.gep
1256   %c = load volatile float, float addrspace(1)* %c.gep
1257   %fneg.c = fsub float -0.000000e+00, %c
1258   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1259   %fneg = fsub float -0.000000e+00, %fma
1260   store volatile float %fneg, float addrspace(1)* %out
1261   ret void
1262 }
1263
1264 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1265 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1266 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1267 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1268
1269 ; GCN-SAFE: v_xor_b32
1270 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1271 ; GCN-SAFE: v_xor_b32
1272
1273 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1274 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1275
1276 ; GCN-NSZ-NOT: [[FMA]]
1277 ; GCN-NSZ-NOT: [[NEG_A]]
1278 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1279 ; GCN-NSZ-NOT: [[NEG_A]]
1280 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1281 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1282   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1283   %tid.ext = sext i32 %tid to i64
1284   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1285   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1286   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1287   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1288   %a = load volatile float, float addrspace(1)* %a.gep
1289   %b = load volatile float, float addrspace(1)* %b.gep
1290   %c = load volatile float, float addrspace(1)* %c.gep
1291   %fneg.a = fsub float -0.000000e+00, %a
1292   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1293   %fneg = fsub float -0.000000e+00, %fma
1294   store volatile float %fneg, float addrspace(1)* %out
1295   store volatile float %fneg.a, float addrspace(1)* %out
1296   ret void
1297 }
1298
1299 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1300 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1301 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1302 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1303
1304 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1305 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1306 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1307
1308 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1309 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1310 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1311 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1312   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1313   %tid.ext = sext i32 %tid to i64
1314   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1315   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1316   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1317   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1318   %a = load volatile float, float addrspace(1)* %a.gep
1319   %b = load volatile float, float addrspace(1)* %b.gep
1320   %c = load volatile float, float addrspace(1)* %c.gep
1321   %fneg.a = fsub float -0.000000e+00, %a
1322   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1323   %fneg = fsub float -0.000000e+00, %fma
1324   %use1 = fmul float %fneg.a, %d
1325   store volatile float %fneg, float addrspace(1)* %out
1326   store volatile float %use1, float addrspace(1)* %out
1327   ret void
1328 }
1329
1330 ; --------------------------------------------------------------------------------
1331 ; fmad tests
1332 ; --------------------------------------------------------------------------------
1333
1334 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1335 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1336 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1337 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1338
1339 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1340 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1341
1342 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1343 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1344 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1345   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1346   %tid.ext = sext i32 %tid to i64
1347   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1348   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1349   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1350   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1351   %a = load volatile float, float addrspace(1)* %a.gep
1352   %b = load volatile float, float addrspace(1)* %b.gep
1353   %c = load volatile float, float addrspace(1)* %c.gep
1354   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1355   %fneg = fsub float -0.000000e+00, %fma
1356   store float %fneg, float addrspace(1)* %out.gep
1357   ret void
1358 }
1359
1360 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1361 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1362 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1363 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1364
1365 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1366 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1367 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1368
1369 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]]
1370 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1371
1372 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1373 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1374 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1375   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1376   %tid.ext = sext i32 %tid to i64
1377   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1378   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1379   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1380   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1381   %a = load volatile float, float addrspace(1)* %a.gep
1382   %b = load volatile float, float addrspace(1)* %b.gep
1383   %c = load volatile float, float addrspace(1)* %c.gep
1384   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1385   %fneg = fsub float -0.000000e+00, %fma
1386   %use1 = fmul float %fma, 4.0
1387   store volatile float %fneg, float addrspace(1)* %out
1388   store volatile float %use1, float addrspace(1)* %out
1389   ret void
1390 }
1391
1392 ; --------------------------------------------------------------------------------
1393 ; fp_extend tests
1394 ; --------------------------------------------------------------------------------
1395
1396 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1397 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1398 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1399 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1400 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1401   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1402   %tid.ext = sext i32 %tid to i64
1403   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1404   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1405   %a = load volatile float, float addrspace(1)* %a.gep
1406   %fpext = fpext float %a to double
1407   %fneg = fsub double -0.000000e+00, %fpext
1408   store double %fneg, double addrspace(1)* %out.gep
1409   ret void
1410 }
1411
1412 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1413 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1414 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1415 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1416 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1417   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1418   %tid.ext = sext i32 %tid to i64
1419   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1420   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1421   %a = load volatile float, float addrspace(1)* %a.gep
1422   %fneg.a = fsub float -0.000000e+00, %a
1423   %fpext = fpext float %fneg.a to double
1424   %fneg = fsub double -0.000000e+00, %fpext
1425   store double %fneg, double addrspace(1)* %out.gep
1426   ret void
1427 }
1428
1429 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1430 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1431 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1432 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1433 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1434 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1435 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1436   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1437   %tid.ext = sext i32 %tid to i64
1438   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1439   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1440   %a = load volatile float, float addrspace(1)* %a.gep
1441   %fneg.a = fsub float -0.000000e+00, %a
1442   %fpext = fpext float %fneg.a to double
1443   %fneg = fsub double -0.000000e+00, %fpext
1444   store volatile double %fneg, double addrspace(1)* %out.gep
1445   store volatile float %fneg.a, float addrspace(1)* undef
1446   ret void
1447 }
1448
1449 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1450 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1451 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1452 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1453 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1454 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1455 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1456   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1457   %tid.ext = sext i32 %tid to i64
1458   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1459   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1460   %a = load volatile float, float addrspace(1)* %a.gep
1461   %fpext = fpext float %a to double
1462   %fneg = fsub double -0.000000e+00, %fpext
1463   store volatile double %fneg, double addrspace(1)* %out.gep
1464   store volatile double %fpext, double addrspace(1)* undef
1465   ret void
1466 }
1467
1468 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1469 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1470 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1471 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1472 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1473 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1474 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1475 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1476   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1477   %tid.ext = sext i32 %tid to i64
1478   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1479   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1480   %a = load volatile float, float addrspace(1)* %a.gep
1481   %fpext = fpext float %a to double
1482   %fneg = fsub double -0.000000e+00, %fpext
1483   %mul = fmul double %fpext, 4.0
1484   store volatile double %fneg, double addrspace(1)* %out.gep
1485   store volatile double %mul, double addrspace(1)* %out.gep
1486   ret void
1487 }
1488
1489 ; FIXME: Source modifiers not folded for f16->f32
1490 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1491 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1492   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1493   %tid.ext = sext i32 %tid to i64
1494   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1495   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1496   %a = load volatile half, half addrspace(1)* %a.gep
1497   %fpext = fpext half %a to float
1498   %fneg = fsub float -0.000000e+00, %fpext
1499   store volatile float %fneg, float addrspace(1)* %out.gep
1500   store volatile float %fpext, float addrspace(1)* %out.gep
1501   ret void
1502 }
1503
1504 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1505 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1506   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1507   %tid.ext = sext i32 %tid to i64
1508   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1509   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1510   %a = load volatile half, half addrspace(1)* %a.gep
1511   %fpext = fpext half %a to float
1512   %fneg = fsub float -0.000000e+00, %fpext
1513   %mul = fmul float %fpext, 4.0
1514   store volatile float %fneg, float addrspace(1)* %out.gep
1515   store volatile float %mul, float addrspace(1)* %out.gep
1516   ret void
1517 }
1518
1519 ; --------------------------------------------------------------------------------
1520 ; fp_round tests
1521 ; --------------------------------------------------------------------------------
1522
1523 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1524 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1525 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1526 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1527 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1528   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1529   %tid.ext = sext i32 %tid to i64
1530   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1531   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1532   %a = load volatile double, double addrspace(1)* %a.gep
1533   %fpround = fptrunc double %a to float
1534   %fneg = fsub float -0.000000e+00, %fpround
1535   store float %fneg, float addrspace(1)* %out.gep
1536   ret void
1537 }
1538
1539 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1540 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1541 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1542 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1543 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1544   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1545   %tid.ext = sext i32 %tid to i64
1546   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1547   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1548   %a = load volatile double, double addrspace(1)* %a.gep
1549   %fneg.a = fsub double -0.000000e+00, %a
1550   %fpround = fptrunc double %fneg.a to float
1551   %fneg = fsub float -0.000000e+00, %fpround
1552   store float %fneg, float addrspace(1)* %out.gep
1553   ret void
1554 }
1555
1556 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1557 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1558 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1559 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1560 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1561 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1562 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1563   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1564   %tid.ext = sext i32 %tid to i64
1565   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1566   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1567   %a = load volatile double, double addrspace(1)* %a.gep
1568   %fneg.a = fsub double -0.000000e+00, %a
1569   %fpround = fptrunc double %fneg.a to float
1570   %fneg = fsub float -0.000000e+00, %fpround
1571   store volatile float %fneg, float addrspace(1)* %out.gep
1572   store volatile double %fneg.a, double addrspace(1)* undef
1573   ret void
1574 }
1575
1576 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1577 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1578 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1579 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1580
1581 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1582 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1583 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1584   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1585   %tid.ext = sext i32 %tid to i64
1586   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1587   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1588   %a = load volatile double, double addrspace(1)* %a.gep
1589   %fneg.a = fsub double -0.000000e+00, %a
1590   %fpround = fptrunc double %fneg.a to float
1591   %fneg = fsub float -0.000000e+00, %fpround
1592   %use1 = fmul double %fneg.a, %c
1593   store volatile float %fneg, float addrspace(1)* %out.gep
1594   store volatile double %use1, double addrspace(1)* undef
1595   ret void
1596 }
1597
1598 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1599 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1600 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1601 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1602 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1603   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1604   %tid.ext = sext i32 %tid to i64
1605   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1606   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1607   %a = load volatile float, float addrspace(1)* %a.gep
1608   %fpround = fptrunc float %a to half
1609   %fneg = fsub half -0.000000e+00, %fpround
1610   store half %fneg, half addrspace(1)* %out.gep
1611   ret void
1612 }
1613
1614 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1615 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1616 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1617 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1618 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1619   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1620   %tid.ext = sext i32 %tid to i64
1621   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1622   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1623   %a = load volatile float, float addrspace(1)* %a.gep
1624   %fneg.a = fsub float -0.000000e+00, %a
1625   %fpround = fptrunc float %fneg.a to half
1626   %fneg = fsub half -0.000000e+00, %fpround
1627   store half %fneg, half addrspace(1)* %out.gep
1628   ret void
1629 }
1630
1631 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1632 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1633 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1634 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1635 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1636 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1637 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1638   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1639   %tid.ext = sext i32 %tid to i64
1640   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1641   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1642   %a = load volatile double, double addrspace(1)* %a.gep
1643   %fpround = fptrunc double %a to float
1644   %fneg = fsub float -0.000000e+00, %fpround
1645   store volatile float %fneg, float addrspace(1)* %out.gep
1646   store volatile float %fpround, float addrspace(1)* %out.gep
1647   ret void
1648 }
1649
1650 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1651 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1652 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1653 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1654 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1655 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1656 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1657   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1658   %tid.ext = sext i32 %tid to i64
1659   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1660   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1661   %a = load volatile float, float addrspace(1)* %a.gep
1662   %fneg.a = fsub float -0.000000e+00, %a
1663   %fpround = fptrunc float %fneg.a to half
1664   %fneg = fsub half -0.000000e+00, %fpround
1665   store volatile half %fneg, half addrspace(1)* %out.gep
1666   store volatile float %fneg.a, float addrspace(1)* undef
1667   ret void
1668 }
1669
1670 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1671 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1672 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1673 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1674 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1675 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1676 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1677   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1678   %tid.ext = sext i32 %tid to i64
1679   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1680   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1681   %a = load volatile float, float addrspace(1)* %a.gep
1682   %fneg.a = fsub float -0.000000e+00, %a
1683   %fpround = fptrunc float %fneg.a to half
1684   %fneg = fsub half -0.000000e+00, %fpround
1685   %use1 = fmul float %fneg.a, %c
1686   store volatile half %fneg, half addrspace(1)* %out.gep
1687   store volatile float %use1, float addrspace(1)* undef
1688   ret void
1689 }
1690
1691 ; --------------------------------------------------------------------------------
1692 ; rcp tests
1693 ; --------------------------------------------------------------------------------
1694
1695 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1697 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1698 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1699 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1700   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1701   %tid.ext = sext i32 %tid to i64
1702   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1703   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1704   %a = load volatile float, float addrspace(1)* %a.gep
1705   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1706   %fneg = fsub float -0.000000e+00, %rcp
1707   store float %fneg, float addrspace(1)* %out.gep
1708   ret void
1709 }
1710
1711 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1712 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1713 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1714 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1715 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1716   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1717   %tid.ext = sext i32 %tid to i64
1718   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1719   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1720   %a = load volatile float, float addrspace(1)* %a.gep
1721   %fneg.a = fsub float -0.000000e+00, %a
1722   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1723   %fneg = fsub float -0.000000e+00, %rcp
1724   store float %fneg, float addrspace(1)* %out.gep
1725   ret void
1726 }
1727
1728 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1729 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1730 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1731 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1732 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1733 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1734 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1735   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1736   %tid.ext = sext i32 %tid to i64
1737   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1738   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1739   %a = load volatile float, float addrspace(1)* %a.gep
1740   %fneg.a = fsub float -0.000000e+00, %a
1741   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1742   %fneg = fsub float -0.000000e+00, %rcp
1743   store volatile float %fneg, float addrspace(1)* %out.gep
1744   store volatile float %fneg.a, float addrspace(1)* undef
1745   ret void
1746 }
1747
1748 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1749 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1750 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1751 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1753 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1754 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1755   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1756   %tid.ext = sext i32 %tid to i64
1757   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1758   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1759   %a = load volatile float, float addrspace(1)* %a.gep
1760   %fneg.a = fsub float -0.000000e+00, %a
1761   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1762   %fneg = fsub float -0.000000e+00, %rcp
1763   %use1 = fmul float %fneg.a, %c
1764   store volatile float %fneg, float addrspace(1)* %out.gep
1765   store volatile float %use1, float addrspace(1)* undef
1766   ret void
1767 }
1768
1769 ; --------------------------------------------------------------------------------
1770 ; fmul_legacy tests
1771 ; --------------------------------------------------------------------------------
1772
1773 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1774 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1775 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1776 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1777 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1778 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1779   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1780   %tid.ext = sext i32 %tid to i64
1781   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1782   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1783   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1784   %a = load volatile float, float addrspace(1)* %a.gep
1785   %b = load volatile float, float addrspace(1)* %b.gep
1786   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1787   %fneg = fsub float -0.000000e+00, %mul
1788   store float %fneg, float addrspace(1)* %out.gep
1789   ret void
1790 }
1791
1792 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1793 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1794 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1795 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1796 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1797 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1798 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1799 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1800   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1801   %tid.ext = sext i32 %tid to i64
1802   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1803   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1804   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1805   %a = load volatile float, float addrspace(1)* %a.gep
1806   %b = load volatile float, float addrspace(1)* %b.gep
1807   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1808   %fneg = fsub float -0.000000e+00, %mul
1809   store volatile float %fneg, float addrspace(1)* %out
1810   store volatile float %mul, float addrspace(1)* %out
1811   ret void
1812 }
1813
1814 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1815 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1816 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1817 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1818 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1819 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1820 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1821 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1822   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1823   %tid.ext = sext i32 %tid to i64
1824   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1825   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1826   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1827   %a = load volatile float, float addrspace(1)* %a.gep
1828   %b = load volatile float, float addrspace(1)* %b.gep
1829   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1830   %fneg = fsub float -0.000000e+00, %mul
1831   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1832   store volatile float %fneg, float addrspace(1)* %out
1833   store volatile float %use1, float addrspace(1)* %out
1834   ret void
1835 }
1836
1837 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1838 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1839 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1840 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1841 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1842 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1843   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1844   %tid.ext = sext i32 %tid to i64
1845   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1846   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1847   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1848   %a = load volatile float, float addrspace(1)* %a.gep
1849   %b = load volatile float, float addrspace(1)* %b.gep
1850   %fneg.a = fsub float -0.000000e+00, %a
1851   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1852   %fneg = fsub float -0.000000e+00, %mul
1853   store volatile float %fneg, float addrspace(1)* %out
1854   ret void
1855 }
1856
1857 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1858 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1859 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1860 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1861 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1862 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1863   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1864   %tid.ext = sext i32 %tid to i64
1865   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1866   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1867   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1868   %a = load volatile float, float addrspace(1)* %a.gep
1869   %b = load volatile float, float addrspace(1)* %b.gep
1870   %fneg.b = fsub float -0.000000e+00, %b
1871   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1872   %fneg = fsub float -0.000000e+00, %mul
1873   store volatile float %fneg, float addrspace(1)* %out
1874   ret void
1875 }
1876
1877 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1878 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1879 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1880 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1881 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1882 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1883   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1884   %tid.ext = sext i32 %tid to i64
1885   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1886   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1887   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1888   %a = load volatile float, float addrspace(1)* %a.gep
1889   %b = load volatile float, float addrspace(1)* %b.gep
1890   %fneg.a = fsub float -0.000000e+00, %a
1891   %fneg.b = fsub float -0.000000e+00, %b
1892   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1893   %fneg = fsub float -0.000000e+00, %mul
1894   store volatile float %fneg, float addrspace(1)* %out
1895   ret void
1896 }
1897
1898 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1899 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1900 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1901 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1902 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1903 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1904 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1905 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1906   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1907   %tid.ext = sext i32 %tid to i64
1908   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1909   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1910   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1911   %a = load volatile float, float addrspace(1)* %a.gep
1912   %b = load volatile float, float addrspace(1)* %b.gep
1913   %fneg.a = fsub float -0.000000e+00, %a
1914   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1915   %fneg = fsub float -0.000000e+00, %mul
1916   store volatile float %fneg, float addrspace(1)* %out
1917   store volatile float %fneg.a, float addrspace(1)* %out
1918   ret void
1919 }
1920
1921 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1922 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1923 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1924 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1925 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1926 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1927 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1928 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1929   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1930   %tid.ext = sext i32 %tid to i64
1931   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1932   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1933   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1934   %a = load volatile float, float addrspace(1)* %a.gep
1935   %b = load volatile float, float addrspace(1)* %b.gep
1936   %fneg.a = fsub float -0.000000e+00, %a
1937   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1938   %fneg = fsub float -0.000000e+00, %mul
1939   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1940   store volatile float %fneg, float addrspace(1)* %out
1941   store volatile float %use1, float addrspace(1)* %out
1942   ret void
1943 }
1944
1945 ; --------------------------------------------------------------------------------
1946 ; sin tests
1947 ; --------------------------------------------------------------------------------
1948
1949 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1950 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1951 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1952 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1953 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1954 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1955 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1956   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1957   %tid.ext = sext i32 %tid to i64
1958   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1959   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1960   %a = load volatile float, float addrspace(1)* %a.gep
1961   %sin = call float @llvm.sin.f32(float %a)
1962   %fneg = fsub float -0.000000e+00, %sin
1963   store float %fneg, float addrspace(1)* %out.gep
1964   ret void
1965 }
1966
1967 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1968 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1969 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1970 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1971 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1972   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1973   %tid.ext = sext i32 %tid to i64
1974   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1975   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1976   %a = load volatile float, float addrspace(1)* %a.gep
1977   %sin = call float @llvm.amdgcn.sin.f32(float %a)
1978   %fneg = fsub float -0.0, %sin
1979   store float %fneg, float addrspace(1)* %out.gep
1980   ret void
1981 }
1982
1983 ; --------------------------------------------------------------------------------
1984 ; ftrunc tests
1985 ; --------------------------------------------------------------------------------
1986
1987 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
1988 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1989 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1990 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1991 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1992   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1993   %tid.ext = sext i32 %tid to i64
1994   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1995   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1996   %a = load volatile float, float addrspace(1)* %a.gep
1997   %trunc = call float @llvm.trunc.f32(float %a)
1998   %fneg = fsub float -0.0, %trunc
1999   store float %fneg, float addrspace(1)* %out.gep
2000   ret void
2001 }
2002
2003 ; --------------------------------------------------------------------------------
2004 ; fround tests
2005 ; --------------------------------------------------------------------------------
2006
2007 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2008 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2009 ; GCN: v_trunc_f32_e32
2010 ; GCN: v_sub_f32_e32
2011 ; GCN: v_cndmask_b32
2012
2013 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2014 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2015
2016 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2017 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2018 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2019   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2020   %tid.ext = sext i32 %tid to i64
2021   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2022   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2023   %a = load volatile float, float addrspace(1)* %a.gep
2024   %round = call float @llvm.round.f32(float %a)
2025   %fneg = fsub float -0.0, %round
2026   store float %fneg, float addrspace(1)* %out.gep
2027   ret void
2028 }
2029
2030 ; --------------------------------------------------------------------------------
2031 ; rint tests
2032 ; --------------------------------------------------------------------------------
2033
2034 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2035 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2036 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2037 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2038 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2039   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2040   %tid.ext = sext i32 %tid to i64
2041   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2042   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2043   %a = load volatile float, float addrspace(1)* %a.gep
2044   %rint = call float @llvm.rint.f32(float %a)
2045   %fneg = fsub float -0.0, %rint
2046   store float %fneg, float addrspace(1)* %out.gep
2047   ret void
2048 }
2049
2050 ; --------------------------------------------------------------------------------
2051 ; nearbyint tests
2052 ; --------------------------------------------------------------------------------
2053
2054 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2055 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2056 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2057 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2058 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2059   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2060   %tid.ext = sext i32 %tid to i64
2061   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2062   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2063   %a = load volatile float, float addrspace(1)* %a.gep
2064   %nearbyint = call float @llvm.nearbyint.f32(float %a)
2065   %fneg = fsub float -0.0, %nearbyint
2066   store float %fneg, float addrspace(1)* %out.gep
2067   ret void
2068 }
2069
2070 ; --------------------------------------------------------------------------------
2071 ; fcanonicalize tests
2072 ; --------------------------------------------------------------------------------
2073
2074 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2075 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2076 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2077 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2078 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2079   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2080   %tid.ext = sext i32 %tid to i64
2081   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2082   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2083   %a = load volatile float, float addrspace(1)* %a.gep
2084   %trunc = call float @llvm.canonicalize.f32(float %a)
2085   %fneg = fsub float -0.0, %trunc
2086   store float %fneg, float addrspace(1)* %out.gep
2087   ret void
2088 }
2089
2090 ; --------------------------------------------------------------------------------
2091 ; vintrp tests
2092 ; --------------------------------------------------------------------------------
2093
2094 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2095 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2096 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2097 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2098 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2099 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2100 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2101   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2102   %tid.ext = sext i32 %tid to i64
2103   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2104   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2105   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2106   %a = load volatile float, float addrspace(1)* %a.gep
2107   %b = load volatile float, float addrspace(1)* %b.gep
2108   %mul = fmul float %a, %b
2109   %fneg = fsub float -0.0, %mul
2110   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2111   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2112   store volatile float %intrp0, float addrspace(1)* %out.gep
2113   store volatile float %intrp1, float addrspace(1)* %out.gep
2114   ret void
2115 }
2116
2117 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2118 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2119 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2120 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2121 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2122 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2123 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2124   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2125   %tid.ext = sext i32 %tid to i64
2126   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2127   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2128   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2129   %a = load volatile float, float addrspace(1)* %a.gep
2130   %b = load volatile float, float addrspace(1)* %b.gep
2131   %mul = fmul float %a, %b
2132   %fneg = fsub float -0.0, %mul
2133   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2134   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2135   store volatile float %intrp0, float addrspace(1)* %out.gep
2136   store volatile float %intrp1, float addrspace(1)* %out.gep
2137   ret void
2138 }
2139
2140 ; --------------------------------------------------------------------------------
2141 ; CopyToReg tests
2142 ; --------------------------------------------------------------------------------
2143
2144 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2145 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2146 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2147 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2148 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2149 ; GCN: s_cbranch_scc0
2150
2151 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2152 ; GCN: s_endpgm
2153
2154 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2155 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2156 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2157
2158 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2159   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2160   %tid.ext = sext i32 %tid to i64
2161   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2162   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2163   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2164   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2165   %a = load volatile float, float addrspace(1)* %a.gep
2166   %b = load volatile float, float addrspace(1)* %b.gep
2167   %c = load volatile float, float addrspace(1)* %c.gep
2168   %mul = fmul float %a, %b
2169   %fneg = fsub float -0.0, %mul
2170   %cmp0 = icmp eq i32 %d, 0
2171   br i1 %cmp0, label %if, label %endif
2172
2173 if:
2174   %mul1 = fmul float %fneg, %c
2175   store volatile float %mul1, float addrspace(1)* %out.gep
2176   br label %endif
2177
2178 endif:
2179   store volatile float %mul, float addrspace(1)* %out.gep
2180   ret void
2181 }
2182
2183 ; --------------------------------------------------------------------------------
2184 ; inlineasm tests
2185 ; --------------------------------------------------------------------------------
2186
2187 ; Can't fold into use, so should fold into source
2188 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2191 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2192 ; GCN: ; use [[MUL]]
2193 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2194 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2195   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2196   %tid.ext = sext i32 %tid to i64
2197   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2198   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2199   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2200   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2201   %a = load volatile float, float addrspace(1)* %a.gep
2202   %b = load volatile float, float addrspace(1)* %b.gep
2203   %c = load volatile float, float addrspace(1)* %c.gep
2204   %mul = fmul float %a, %b
2205   %fneg = fsub float -0.0, %mul
2206   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2207   store volatile float %fneg, float addrspace(1)* %out.gep
2208   ret void
2209 }
2210
2211 ; --------------------------------------------------------------------------------
2212 ; inlineasm tests
2213 ; --------------------------------------------------------------------------------
2214
2215 ; Can't fold into use, so should fold into source
2216 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2217 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2218 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2219 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2220 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2221 ; GCN: ; use [[NEG]]
2222 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2223 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2224   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2225   %tid.ext = sext i32 %tid to i64
2226   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2227   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2228   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2229   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2230   %a = load volatile float, float addrspace(1)* %a.gep
2231   %b = load volatile float, float addrspace(1)* %b.gep
2232   %c = load volatile float, float addrspace(1)* %c.gep
2233   %mul = fmul float %a, %b
2234   %fneg = fsub float -0.0, %mul
2235   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2236   store volatile float %mul, float addrspace(1)* %out.gep
2237   ret void
2238 }
2239
2240 ; --------------------------------------------------------------------------------
2241 ; code size regression tests
2242 ; --------------------------------------------------------------------------------
2243
2244 ; There are multiple users of the fneg that must use a VOP3
2245 ; instruction, so there is no penalty
2246 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2247 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2248 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2249 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2250
2251 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2252 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2253
2254 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2255 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2256 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2257   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2258   %tid.ext = sext i32 %tid to i64
2259   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2260   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2261   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2262   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2263   %a = load volatile float, float addrspace(1)* %a.gep
2264   %b = load volatile float, float addrspace(1)* %b.gep
2265   %c = load volatile float, float addrspace(1)* %c.gep
2266
2267   %fneg.a = fsub float -0.0, %a
2268   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2269   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2270
2271   store volatile float %fma0, float addrspace(1)* %out
2272   store volatile float %fma1, float addrspace(1)* %out
2273   ret void
2274 }
2275
2276 ; There are multiple users, but both require using a larger encoding
2277 ; for the modifier.
2278
2279 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2280 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2281 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2282 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2283
2284 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2285 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2286 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2287 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2288 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2289   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2290   %tid.ext = sext i32 %tid to i64
2291   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2292   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2293   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2294   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2295   %a = load volatile float, float addrspace(1)* %a.gep
2296   %b = load volatile float, float addrspace(1)* %b.gep
2297   %c = load volatile float, float addrspace(1)* %c.gep
2298
2299   %fneg.a = fsub float -0.0, %a
2300   %mul0 = fmul float %fneg.a, %b
2301   %mul1 = fmul float %fneg.a, %c
2302
2303   store volatile float %mul0, float addrspace(1)* %out
2304   store volatile float %mul1, float addrspace(1)* %out
2305   ret void
2306 }
2307
2308 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2309 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2310 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2311 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2312 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2313
2314 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2315 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2316
2317 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2318 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2319 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2320   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2321   %tid.ext = sext i32 %tid to i64
2322   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2323   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2324   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2325   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2326   %a = load volatile float, float addrspace(1)* %a.gep
2327   %b = load volatile float, float addrspace(1)* %b.gep
2328   %c = load volatile float, float addrspace(1)* %c.gep
2329
2330   %fneg.a = fsub float -0.0, %a
2331   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2332   %mul1 = fmul float %fneg.a, %c
2333
2334   store volatile float %fma0, float addrspace(1)* %out
2335   store volatile float %mul1, float addrspace(1)* %out
2336   ret void
2337 }
2338
2339 ; The use of the fneg requires a code size increase, but folding into
2340 ; the source does not
2341
2342 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2343 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2344 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2345 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2346 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2347
2348 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2349 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2350 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2351
2352 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2353 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2354 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2355
2356 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2357 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2358 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2359   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2360   %tid.ext = sext i32 %tid to i64
2361   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2362   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2363   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2364   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2365   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2366   %a = load volatile float, float addrspace(1)* %a.gep
2367   %b = load volatile float, float addrspace(1)* %b.gep
2368   %c = load volatile float, float addrspace(1)* %c.gep
2369   %d = load volatile float, float addrspace(1)* %d.gep
2370
2371   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2372   %fneg.fma0 = fsub float -0.0, %fma0
2373   %mul1 = fmul float %fneg.fma0, %c
2374   %mul2 = fmul float %fneg.fma0, %d
2375
2376   store volatile float %mul1, float addrspace(1)* %out
2377   store volatile float %mul2, float addrspace(1)* %out
2378   ret void
2379 }
2380
2381 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2382 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2383 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2384 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2385 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2386
2387 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2388 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2389 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2390
2391 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2392 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2393 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2394   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2395   %tid.ext = sext i32 %tid to i64
2396   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2397   %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2398   %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2399   %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2400   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2401   %a = load volatile double, double addrspace(1)* %a.gep
2402   %b = load volatile double, double addrspace(1)* %b.gep
2403   %c = load volatile double, double addrspace(1)* %c.gep
2404   %d = load volatile double, double addrspace(1)* %d.gep
2405
2406   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2407   %fneg.fma0 = fsub double -0.0, %fma0
2408   %mul1 = fmul double %fneg.fma0, %c
2409   %mul2 = fmul double %fneg.fma0, %d
2410
2411   store volatile double %mul1, double addrspace(1)* %out
2412   store volatile double %mul2, double addrspace(1)* %out
2413   ret void
2414 }
2415
2416 ; %trunc.a has one fneg use, but it requires a code size increase and
2417 ; %the fneg can instead be folded for free into the fma.
2418
2419 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2420 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2421 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2422 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2423 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2424 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2425 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2426 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2427   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2428   %tid.ext = sext i32 %tid to i64
2429   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2430   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2431   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2432   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2433   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2434   %a = load volatile float, float addrspace(1)* %a.gep
2435   %b = load volatile float, float addrspace(1)* %b.gep
2436   %c = load volatile float, float addrspace(1)* %c.gep
2437   %d = load volatile float, float addrspace(1)* %d.gep
2438
2439   %trunc.a = call float @llvm.trunc.f32(float %a)
2440   %trunc.fneg.a = fsub float -0.0, %trunc.a
2441   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2442   store volatile float %fma0, float addrspace(1)* %out
2443   ret void
2444 }
2445
2446 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2447 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2448 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2449 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2450 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2451 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2452 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2453 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2454 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2455 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2456 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2457   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2458   %tid.ext = sext i32 %tid to i64
2459   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2460   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2461   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2462   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2463   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2464   %a = load volatile float, float addrspace(1)* %a.gep
2465   %b = load volatile float, float addrspace(1)* %b.gep
2466   %c = load volatile float, float addrspace(1)* %c.gep
2467   %d = load volatile float, float addrspace(1)* %d.gep
2468
2469   %trunc.a = call float @llvm.trunc.f32(float %a)
2470   %trunc.fneg.a = fsub float -0.0, %trunc.a
2471   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2472   %mul1 = fmul float %trunc.a, %d
2473   store volatile float %fma0, float addrspace(1)* %out
2474   store volatile float %mul1, float addrspace(1)* %out
2475   ret void
2476 }
2477
2478 declare i32 @llvm.amdgcn.workitem.id.x() #1
2479 declare float @llvm.fma.f32(float, float, float) #1
2480 declare float @llvm.fmuladd.f32(float, float, float) #1
2481 declare float @llvm.sin.f32(float) #1
2482 declare float @llvm.trunc.f32(float) #1
2483 declare float @llvm.round.f32(float) #1
2484 declare float @llvm.rint.f32(float) #1
2485 declare float @llvm.nearbyint.f32(float) #1
2486 declare float @llvm.canonicalize.f32(float) #1
2487 declare float @llvm.minnum.f32(float, float) #1
2488 declare float @llvm.maxnum.f32(float, float) #1
2489 declare half @llvm.minnum.f16(half, half) #1
2490 declare double @llvm.minnum.f64(double, double) #1
2491 declare double @llvm.fma.f64(double, double, double) #1
2492
2493 declare float @llvm.amdgcn.sin.f32(float) #1
2494 declare float @llvm.amdgcn.rcp.f32(float) #1
2495 declare float @llvm.amdgcn.rcp.legacy(float) #1
2496 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2497 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2498 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2499
2500 attributes #0 = { nounwind }
2501 attributes #1 = { nounwind readnone }