test/CodeGen/AMDGPU/fneg-combines.ll

   1 ; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
   3
   4 ; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
   5 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
   6
   7 ; --------------------------------------------------------------------------------
   8 ; fadd tests
   9 ; --------------------------------------------------------------------------------
  10
  11 ; GCN-LABEL: {{^}}v_fneg_add_f32:
  12 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  13 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  14
  15 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  16 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
  17
  18 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
  19 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  20 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  21   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  22   %tid.ext = sext i32 %tid to i64
  23   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  24   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  25   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  26   %a = load volatile float, float addrspace(1)* %a.gep
  27   %b = load volatile float, float addrspace(1)* %b.gep
  28   %add = fadd float %a, %b
  29   %fneg = fsub float -0.000000e+00, %add
  30   store float %fneg, float addrspace(1)* %out.gep
  31   ret void
  32 }
  33
  34 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
  35 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  36 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  37 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  38 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
  39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
  40 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
  41 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  42   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  43   %tid.ext = sext i32 %tid to i64
  44   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  45   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  46   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  47   %a = load volatile float, float addrspace(1)* %a.gep
  48   %b = load volatile float, float addrspace(1)* %b.gep
  49   %add = fadd float %a, %b
  50   %fneg = fsub float -0.000000e+00, %add
  51   store volatile float %fneg, float addrspace(1)* %out
  52   store volatile float %add, float addrspace(1)* %out
  53   ret void
  54 }
  55
  56 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
  57 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  58 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  59
  60 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  61 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
  62 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
  63
  64 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
  65 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
  66
  67 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
  68 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
  69 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  70   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  71   %tid.ext = sext i32 %tid to i64
  72   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  73   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  74   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  75   %a = load volatile float, float addrspace(1)* %a.gep
  76   %b = load volatile float, float addrspace(1)* %b.gep
  77   %add = fadd float %a, %b
  78   %fneg = fsub float -0.000000e+00, %add
  79   %use1 = fmul float %add, 4.0
  80   store volatile float %fneg, float addrspace(1)* %out
  81   store volatile float %use1, float addrspace(1)* %out
  82   ret void
  83 }
  84
  85 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
  86 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  87 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  88
  89 ; GCN-SAFE: v_sub_f32_e32
  90 ; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
  91
  92 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
  93
  94 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
  95 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  96   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  97   %tid.ext = sext i32 %tid to i64
  98   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  99   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 100   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 101   %a = load volatile float, float addrspace(1)* %a.gep
 102   %b = load volatile float, float addrspace(1)* %b.gep
 103   %fneg.a = fsub float -0.000000e+00, %a
 104   %add = fadd float %fneg.a, %b
 105   %fneg = fsub float -0.000000e+00, %add
 106   store volatile float %fneg, float addrspace(1)* %out
 107   ret void
 108 }
 109
 110 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
 111 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 112 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 113
 114 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 115 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 116
 117 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 118 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 119 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 120   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 121   %tid.ext = sext i32 %tid to i64
 122   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 123   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 124   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 125   %a = load volatile float, float addrspace(1)* %a.gep
 126   %b = load volatile float, float addrspace(1)* %b.gep
 127   %fneg.b = fsub float -0.000000e+00, %b
 128   %add = fadd float %a, %fneg.b
 129   %fneg = fsub float -0.000000e+00, %add
 130   store volatile float %fneg, float addrspace(1)* %out
 131   ret void
 132 }
 133
 134 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
 135 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 136 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 137
 138 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
 139 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 140
 141 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 142 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 143 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 144   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 145   %tid.ext = sext i32 %tid to i64
 146   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 147   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 148   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 149   %a = load volatile float, float addrspace(1)* %a.gep
 150   %b = load volatile float, float addrspace(1)* %b.gep
 151   %fneg.a = fsub float -0.000000e+00, %a
 152   %fneg.b = fsub float -0.000000e+00, %b
 153   %add = fadd float %fneg.a, %fneg.b
 154   %fneg = fsub float -0.000000e+00, %add
 155   store volatile float %fneg, float addrspace(1)* %out
 156   ret void
 157 }
 158
 159 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
 160 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
 161 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 162 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 163
 164 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
 165 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 166 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
 167
 168 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 169 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
 170 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
 171 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 172 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 173   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 174   %tid.ext = sext i32 %tid to i64
 175   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 176   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 177   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 178   %a = load volatile float, float addrspace(1)* %a.gep
 179   %b = load volatile float, float addrspace(1)* %b.gep
 180   %fneg.a = fsub float -0.000000e+00, %a
 181   %add = fadd float %fneg.a, %b
 182   %fneg = fsub float -0.000000e+00, %add
 183   store volatile float %fneg, float addrspace(1)* %out
 184   store volatile float %fneg.a, float addrspace(1)* %out
 185   ret void
 186 }
 187
 188 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
 189 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 190 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 191
 192 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 193 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 194 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 195
 196 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
 197 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 198 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
 199 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 200 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
 201   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 202   %tid.ext = sext i32 %tid to i64
 203   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 204   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 205   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 206   %a = load volatile float, float addrspace(1)* %a.gep
 207   %b = load volatile float, float addrspace(1)* %b.gep
 208   %fneg.a = fsub float -0.000000e+00, %a
 209   %add = fadd float %fneg.a, %b
 210   %fneg = fsub float -0.000000e+00, %add
 211   %use1 = fmul float %fneg.a, %c
 212   store volatile float %fneg, float addrspace(1)* %out
 213   store volatile float %use1, float addrspace(1)* %out
 214   ret void
 215 }
 216
 217 ; This one asserted with -enable-no-signed-zeros-fp-math
 218 ; GCN-LABEL: {{^}}fneg_fadd_0:
 219 ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
 220 ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
 221 ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
 222 ; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
 223 ; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
 224 ; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
 225 ; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
 226 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
 227
 228 define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
 229 .entry:
 230   %tmp7 = fdiv float 1.000000e+00, %tmp6
 231   %tmp8 = fmul float 0.000000e+00, %tmp7
 232   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
 233   %.i188 = fadd float %tmp9, 0.000000e+00
 234   %tmp10 = fcmp uge float %.i188, %tmp2
 235   %tmp11 = fsub float -0.000000e+00, %.i188
 236   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
 237   %tmp12 = fcmp ule float %.i092, 0.000000e+00
 238   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
 239   ret float %.i198
 240 }
 241
 242 ; --------------------------------------------------------------------------------
 243 ; fmul tests
 244 ; --------------------------------------------------------------------------------
 245
 246 ; GCN-LABEL: {{^}}v_fneg_mul_f32:
 247 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 248 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 249 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 250 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 251 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 252   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 253   %tid.ext = sext i32 %tid to i64
 254   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 255   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 256   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 257   %a = load volatile float, float addrspace(1)* %a.gep
 258   %b = load volatile float, float addrspace(1)* %b.gep
 259   %mul = fmul float %a, %b
 260   %fneg = fsub float -0.000000e+00, %mul
 261   store float %fneg, float addrspace(1)* %out.gep
 262   ret void
 263 }
 264
 265 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
 266 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 267 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 268 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 269 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
 270 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 271 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 272 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 273   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 274   %tid.ext = sext i32 %tid to i64
 275   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 276   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 277   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 278   %a = load volatile float, float addrspace(1)* %a.gep
 279   %b = load volatile float, float addrspace(1)* %b.gep
 280   %mul = fmul float %a, %b
 281   %fneg = fsub float -0.000000e+00, %mul
 282   store volatile float %fneg, float addrspace(1)* %out
 283   store volatile float %mul, float addrspace(1)* %out
 284   ret void
 285 }
 286
 287 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
 288 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 289 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 290 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
 291 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
 292
 293 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
 294 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 295 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 296   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 297   %tid.ext = sext i32 %tid to i64
 298   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 299   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 300   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 301   %a = load volatile float, float addrspace(1)* %a.gep
 302   %b = load volatile float, float addrspace(1)* %b.gep
 303   %mul = fmul float %a, %b
 304   %fneg = fsub float -0.000000e+00, %mul
 305   %use1 = fmul float %mul, 4.0
 306   store volatile float %fneg, float addrspace(1)* %out
 307   store volatile float %use1, float addrspace(1)* %out
 308   ret void
 309 }
 310
 311 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
 312 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 313 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 314 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 315 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 316 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 317   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 318   %tid.ext = sext i32 %tid to i64
 319   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 320   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 321   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 322   %a = load volatile float, float addrspace(1)* %a.gep
 323   %b = load volatile float, float addrspace(1)* %b.gep
 324   %fneg.a = fsub float -0.000000e+00, %a
 325   %mul = fmul float %fneg.a, %b
 326   %fneg = fsub float -0.000000e+00, %mul
 327   store volatile float %fneg, float addrspace(1)* %out
 328   ret void
 329 }
 330
 331 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
 332 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 333 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 334 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 335 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 336 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 337   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 338   %tid.ext = sext i32 %tid to i64
 339   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 340   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 341   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 342   %a = load volatile float, float addrspace(1)* %a.gep
 343   %b = load volatile float, float addrspace(1)* %b.gep
 344   %fneg.b = fsub float -0.000000e+00, %b
 345   %mul = fmul float %a, %fneg.b
 346   %fneg = fsub float -0.000000e+00, %mul
 347   store volatile float %fneg, float addrspace(1)* %out
 348   ret void
 349 }
 350
 351 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
 352 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 353 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 354 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 355 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 356 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 357   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 358   %tid.ext = sext i32 %tid to i64
 359   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 360   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 361   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 362   %a = load volatile float, float addrspace(1)* %a.gep
 363   %b = load volatile float, float addrspace(1)* %b.gep
 364   %fneg.a = fsub float -0.000000e+00, %a
 365   %fneg.b = fsub float -0.000000e+00, %b
 366   %mul = fmul float %fneg.a, %fneg.b
 367   %fneg = fsub float -0.000000e+00, %mul
 368   store volatile float %fneg, float addrspace(1)* %out
 369   ret void
 370 }
 371
 372 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
 373 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 374 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 375 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 376 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
 377
 378 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 379 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 380 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 381   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 382   %tid.ext = sext i32 %tid to i64
 383   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 384   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 385   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 386   %a = load volatile float, float addrspace(1)* %a.gep
 387   %b = load volatile float, float addrspace(1)* %b.gep
 388   %fneg.a = fsub float -0.000000e+00, %a
 389   %mul = fmul float %fneg.a, %b
 390   %fneg = fsub float -0.000000e+00, %mul
 391   store volatile float %fneg, float addrspace(1)* %out
 392   store volatile float %fneg.a, float addrspace(1)* %out
 393   ret void
 394 }
 395
 396 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
 397 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 398 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 399 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
 400 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 401 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 402 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 403 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
 404   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 405   %tid.ext = sext i32 %tid to i64
 406   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 407   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 408   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 409   %a = load volatile float, float addrspace(1)* %a.gep
 410   %b = load volatile float, float addrspace(1)* %b.gep
 411   %fneg.a = fsub float -0.000000e+00, %a
 412   %mul = fmul float %fneg.a, %b
 413   %fneg = fsub float -0.000000e+00, %mul
 414   %use1 = fmul float %fneg.a, %c
 415   store volatile float %fneg, float addrspace(1)* %out
 416   store volatile float %use1, float addrspace(1)* %out
 417   ret void
 418 }
 419
 420 ; --------------------------------------------------------------------------------
 421 ; fminnum tests
 422 ; --------------------------------------------------------------------------------
 423
 424 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
 425 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 426 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 427 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 428 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 429 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 430 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 431 define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 432   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 433   %tid.ext = sext i32 %tid to i64
 434   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 435   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 436   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 437   %a = load volatile float, float addrspace(1)* %a.gep
 438   %b = load volatile float, float addrspace(1)* %b.gep
 439   %min = call float @llvm.minnum.f32(float %a, float %b)
 440   %fneg = fsub float -0.000000e+00, %min
 441   store float %fneg, float addrspace(1)* %out.gep
 442   ret void
 443 }
 444
 445 ; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
 446 ; GCN-NOT: v0
 447 ; GCN-NOT: v1
 448 ; GCN: v_max_f32_e64 v0, -v0, -v1
 449 ; GCN-NEXT: ; return
 450 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
 451   %min = call float @llvm.minnum.f32(float %a, float %b)
 452   %fneg = fsub float -0.000000e+00, %min
 453   ret float %fneg
 454 }
 455
 456 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
 457 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 458 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 459 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 460 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 461 define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 462   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 463   %tid.ext = sext i32 %tid to i64
 464   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 465   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 466   %a = load volatile float, float addrspace(1)* %a.gep
 467   %min = call float @llvm.minnum.f32(float %a, float %a)
 468   %min.fneg = fsub float -0.0, %min
 469   store float %min.fneg, float addrspace(1)* %out.gep
 470   ret void
 471 }
 472
 473 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
 474 ; GCN-NOT: v0
 475 ; GCN: v_max_f32_e64 v0, -v0, -v0
 476 ; GCN-NEXT: ; return
 477 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
 478   %min = call float @llvm.minnum.f32(float %a, float %a)
 479   %min.fneg = fsub float -0.0, %min
 480   ret float %min.fneg
 481 }
 482
 483 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
 484 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 485 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 486 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 487 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 488 define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 489   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 490   %tid.ext = sext i32 %tid to i64
 491   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 492   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 493   %a = load volatile float, float addrspace(1)* %a.gep
 494   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 495   %fneg = fsub float -0.000000e+00, %min
 496   store float %fneg, float addrspace(1)* %out.gep
 497   ret void
 498 }
 499
 500 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
 501 ; GCN-NOT: v0
 502 ; GCN: v_max_f32_e64 v0, -v0, -4.0
 503 ; GCN-NEXT: ; return
 504 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
 505   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 506   %fneg = fsub float -0.000000e+00, %min
 507   ret float %fneg
 508 }
 509
 510 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
 511 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 512 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 513 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 514 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 515 define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 516   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 517   %tid.ext = sext i32 %tid to i64
 518   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 519   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 520   %a = load volatile float, float addrspace(1)* %a.gep
 521   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 522   %fneg = fsub float -0.000000e+00, %min
 523   store float %fneg, float addrspace(1)* %out.gep
 524   ret void
 525 }
 526
 527 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
 528 ; GCN-NOT: v0
 529 ; GCN: v_max_f32_e64 v0, -v0, 4.0
 530 ; GCN-NEXT: ; return
 531 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
 532   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 533   %fneg = fsub float -0.000000e+00, %min
 534   ret float %fneg
 535 }
 536
 537 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
 538 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 539 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
 540 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 541 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 542   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 543   %tid.ext = sext i32 %tid to i64
 544   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 545   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 546   %a = load volatile float, float addrspace(1)* %a.gep
 547   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 548   %fneg = fsub float -0.000000e+00, %min
 549   store float %fneg, float addrspace(1)* %out.gep
 550   ret void
 551 }
 552
 553 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
 554 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 555 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 556 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 557 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 558 define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 559   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 560   %tid.ext = sext i32 %tid to i64
 561   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 562   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 563   %a = load volatile float, float addrspace(1)* %a.gep
 564   %min = call float @llvm.minnum.f32(float -0.0, float %a)
 565   %fneg = fsub float -0.000000e+00, %min
 566   store float %fneg, float addrspace(1)* %out.gep
 567   ret void
 568 }
 569
 570 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
 571 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 572
 573 ; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
 574 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
 575
 576 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
 577 ; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
 578 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 579
 580 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 581 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 582   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 583   %tid.ext = sext i32 %tid to i64
 584   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 585   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 586   %a = load volatile float, float addrspace(1)* %a.gep
 587   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
 588   %fneg = fsub float -0.000000e+00, %min
 589   store float %fneg, float addrspace(1)* %out.gep
 590   ret void
 591 }
 592
 593 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
 594 ; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 595
 596 ; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
 597 ; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
 598
 599 ; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
 600 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 601
 602 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 603 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 604   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 605   %tid.ext = sext i32 %tid to i64
 606   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 607   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 608   %a = load volatile float, float addrspace(1)* %a.gep
 609   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
 610   %fneg = fsub float -0.000000e+00, %min
 611   store float %fneg, float addrspace(1)* %out.gep
 612   ret void
 613 }
 614
 615 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
 616 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 617
 618 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
 619 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
 620 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
 621
 622 ; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
 623 ; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
 624 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
 625
 626 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 627 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
 628   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 629   %tid.ext = sext i32 %tid to i64
 630   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
 631   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
 632   %a = load volatile half, half addrspace(1)* %a.gep
 633   %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
 634   %fneg = fsub half -0.000000e+00, %min
 635   store half %fneg, half addrspace(1)* %out.gep
 636   ret void
 637 }
 638
 639 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
 640 ; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 641
 642 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
 643 ; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
 644 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
 645
 646 ; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
 647 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 648
 649 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 650 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
 651   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 652   %tid.ext = sext i32 %tid to i64
 653   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
 654   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
 655   %a = load volatile half, half addrspace(1)* %a.gep
 656   %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
 657   %fneg = fsub half -0.000000e+00, %min
 658   store half %fneg, half addrspace(1)* %out.gep
 659   ret void
 660 }
 661
 662 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
 663 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 664
 665 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
 666 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
 667 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 668 ; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
 669
 670 ; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
 671 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
 672
 673 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
 674 define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
 675   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 676   %tid.ext = sext i32 %tid to i64
 677   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
 678   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
 679   %a = load volatile double, double addrspace(1)* %a.gep
 680   %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
 681   %fneg = fsub double -0.000000e+00, %min
 682   store double %fneg, double addrspace(1)* %out.gep
 683   ret void
 684 }
 685
 686 ; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
 687 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 688
 689 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
 690 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
 691 ; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 692 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
 693
 694 ; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
 695 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
 696
 697 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 698 define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
 699   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 700   %tid.ext = sext i32 %tid to i64
 701   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
 702   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
 703   %a = load volatile double, double addrspace(1)* %a.gep
 704   %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
 705   %fneg = fsub double -0.000000e+00, %min
 706   store double %fneg, double addrspace(1)* %out.gep
 707   ret void
 708 }
 709
 710 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
 711 ; GCN-NOT: v0
 712 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
 713 ; GCN-NEXT: ; return
 714 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
 715   %min = call float @llvm.minnum.f32(float -0.0, float %a)
 716   %fneg = fsub float -0.000000e+00, %min
 717   ret float %fneg
 718 }
 719
 720 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
 721 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 722 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 723 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
 724 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
 725 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 726 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 727 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 728   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 729   %tid.ext = sext i32 %tid to i64
 730   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 731   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 732   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 733   %a = load volatile float, float addrspace(1)* %a.gep
 734   %b = load volatile float, float addrspace(1)* %b.gep
 735   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 736   %fneg = fsub float -0.000000e+00, %min
 737   %mul = fmul float %fneg, %b
 738   store float %mul, float addrspace(1)* %out.gep
 739   ret void
 740 }
 741
 742 ; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
 743 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 744 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 745
 746 ; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
 747
 748 ; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
 749 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
 750
 751 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
 752 ; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
 753 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 754
 755 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 756 define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 757   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 758   %tid.ext = sext i32 %tid to i64
 759   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 760   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 761   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 762   %a = load volatile float, float addrspace(1)* %a.gep
 763   %b = load volatile float, float addrspace(1)* %b.gep
 764   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
 765   %fneg = fsub float -0.000000e+00, %min
 766   %mul = fmul float %fneg, %b
 767   store float %mul, float addrspace(1)* %out.gep
 768   ret void
 769 }
 770
 771 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
 772 ; GCN-NOT: v0
 773 ; GCN-NOT: v1
 774 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
 775 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
 776 ; GCN-NEXT: ; return
 777 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
 778   %min = call float @llvm.minnum.f32(float 0.0, float %a)
 779   %fneg = fsub float -0.000000e+00, %min
 780   %mul = fmul float %fneg, %b
 781   ret float %mul
 782 }
 783
 784 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
 785 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 786 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 787 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 788 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 789 ; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 790 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
 791 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
 792 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 793 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 794   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 795   %tid.ext = sext i32 %tid to i64
 796   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 797   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 798   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 799   %a = load volatile float, float addrspace(1)* %a.gep
 800   %b = load volatile float, float addrspace(1)* %b.gep
 801   %min = call float @llvm.minnum.f32(float %a, float %b)
 802   %fneg = fsub float -0.000000e+00, %min
 803   %use1 = fmul float %min, 4.0
 804   store volatile float %fneg, float addrspace(1)* %out
 805   store volatile float %use1, float addrspace(1)* %out
 806   ret void
 807 }
 808
 809 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
 810 ; GCN-NOT: v0
 811 ; GCN-NOT: v1
 812 ; GCN: v_max_f32_e64 v0, -v0, -v1
 813 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
 814 ; GCN-NEXT: ; return
 815 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
 816   %min = call float @llvm.minnum.f32(float %a, float %b)
 817   %fneg = fsub float -0.000000e+00, %min
 818   %use1 = fmul float %min, 4.0
 819   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
 820   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
 821   ret <2 x float> %ins1
 822 }
 823
 824 ; --------------------------------------------------------------------------------
 825 ; fmaxnum tests
 826 ; --------------------------------------------------------------------------------
 827
 828
 829 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
 830 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 831 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 832 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 833 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 834 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 835 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 836 define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 837   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 838   %tid.ext = sext i32 %tid to i64
 839   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 840   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 841   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 842   %a = load volatile float, float addrspace(1)* %a.gep
 843   %b = load volatile float, float addrspace(1)* %b.gep
 844   %max = call float @llvm.maxnum.f32(float %a, float %b)
 845   %fneg = fsub float -0.000000e+00, %max
 846   store float %fneg, float addrspace(1)* %out.gep
 847   ret void
 848 }
 849
 850 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
 851 ; GCN-NOT: v0
 852 ; GCN-NOT: v1
 853 ; GCN: v_min_f32_e64 v0, -v0, -v1
 854 ; GCN-NEXT: ; return
 855 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
 856   %max = call float @llvm.maxnum.f32(float %a, float %b)
 857   %fneg = fsub float -0.000000e+00, %max
 858   ret float %fneg
 859 }
 860
 861 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
 862 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 863 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 864 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 865 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 866 define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 867   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 868   %tid.ext = sext i32 %tid to i64
 869   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 870   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 871   %a = load volatile float, float addrspace(1)* %a.gep
 872   %max = call float @llvm.maxnum.f32(float %a, float %a)
 873   %max.fneg = fsub float -0.0, %max
 874   store float %max.fneg, float addrspace(1)* %out.gep
 875   ret void
 876 }
 877
 878 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
 879 ; GCN-NOT: v0
 880 ; GCN: v_min_f32_e64 v0, -v0, -v0
 881 ; GCN-NEXT: ; return
 882 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
 883   %max = call float @llvm.maxnum.f32(float %a, float %a)
 884   %max.fneg = fsub float -0.0, %max
 885   ret float %max.fneg
 886 }
 887
 888 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
 889 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 890 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 891 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 892 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 893 define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 894   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 895   %tid.ext = sext i32 %tid to i64
 896   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 897   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 898   %a = load volatile float, float addrspace(1)* %a.gep
 899   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
 900   %fneg = fsub float -0.000000e+00, %max
 901   store float %fneg, float addrspace(1)* %out.gep
 902   ret void
 903 }
 904
 905 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
 906 ; GCN-NOT: v0
 907 ; GCN: v_min_f32_e64 v0, -v0, -4.0
 908 ; GCN-NEXT: ; return
 909 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
 910   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
 911   %fneg = fsub float -0.000000e+00, %max
 912   ret float %fneg
 913 }
 914
 915 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
 916 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 917 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 918 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 919 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 920 define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 921   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 922   %tid.ext = sext i32 %tid to i64
 923   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 924   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 925   %a = load volatile float, float addrspace(1)* %a.gep
 926   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
 927   %fneg = fsub float -0.000000e+00, %max
 928   store float %fneg, float addrspace(1)* %out.gep
 929   ret void
 930 }
 931
 932 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
 933 ; GCN-NOT: v0
 934 ; GCN: v_min_f32_e64 v0, -v0, 4.0
 935 ; GCN-NEXT: ; return
 936 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
 937   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
 938   %fneg = fsub float -0.000000e+00, %max
 939   ret float %fneg
 940 }
 941
 942 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
 943 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 944 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
 945 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 946 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 947   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 948   %tid.ext = sext i32 %tid to i64
 949   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 950   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 951   %a = load volatile float, float addrspace(1)* %a.gep
 952   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
 953   %fneg = fsub float -0.000000e+00, %max
 954   store float %fneg, float addrspace(1)* %out.gep
 955   ret void
 956 }
 957
 958 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
 959 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 960 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 961 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 962 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 963 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
 964   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 965   %tid.ext = sext i32 %tid to i64
 966   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 967   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 968   %a = load volatile float, float addrspace(1)* %a.gep
 969   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
 970   %fneg = fsub float -0.000000e+00, %max
 971   store float %fneg, float addrspace(1)* %out.gep
 972   ret void
 973 }
 974
 975 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
 976 ; GCN-NOT: v0
 977 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
 978 ; GCN-NEXT: ; return
 979 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
 980   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
 981   %fneg = fsub float -0.000000e+00, %max
 982   ret float %fneg
 983 }
 984
 985 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
 986 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 987 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 988 ; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
 989 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
 990 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
 991 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 992 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 993   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 994   %tid.ext = sext i32 %tid to i64
 995   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 996   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 997   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
 998   %a = load volatile float, float addrspace(1)* %a.gep
 999   %b = load volatile float, float addrspace(1)* %b.gep
1000   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1001   %fneg = fsub float -0.000000e+00, %max
1002   %mul = fmul float %fneg, %b
1003   store float %mul, float addrspace(1)* %out.gep
1004   ret void
1005 }
1006
1007 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1008 ; GCN-NOT: v0
1009 ; GCN-NOT: v1
1010 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
1011 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
1012 ; GCN-NEXT: ; return
1013 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
1014   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1015   %fneg = fsub float -0.000000e+00, %max
1016   %mul = fmul float %fneg, %b
1017   ret float %mul
1018 }
1019
1020 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1021 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1022 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1023 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
1024 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1025 ; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1026 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1027 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1028 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1029 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1030   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1031   %tid.ext = sext i32 %tid to i64
1032   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1033   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1034   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1035   %a = load volatile float, float addrspace(1)* %a.gep
1036   %b = load volatile float, float addrspace(1)* %b.gep
1037   %max = call float @llvm.maxnum.f32(float %a, float %b)
1038   %fneg = fsub float -0.000000e+00, %max
1039   %use1 = fmul float %max, 4.0
1040   store volatile float %fneg, float addrspace(1)* %out
1041   store volatile float %use1, float addrspace(1)* %out
1042   ret void
1043 }
1044
1045 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1046 ; GCN-NOT: v0
1047 ; GCN-NOT: v1
1048 ; GCN: v_min_f32_e64 v0, -v0, -v1
1049 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1050 ; GCN-NEXT: ; return
1051 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1052   %max = call float @llvm.maxnum.f32(float %a, float %b)
1053   %fneg = fsub float -0.000000e+00, %max
1054   %use1 = fmul float %max, 4.0
1055   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1056   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1057   ret <2 x float> %ins1
1058 }
1059
1060 ; --------------------------------------------------------------------------------
1061 ; fma tests
1062 ; --------------------------------------------------------------------------------
1063
1064 ; GCN-LABEL: {{^}}v_fneg_fma_f32:
1065 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1066 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1067 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1068
1069 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1070 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1071
1072 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1073 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1074 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1075   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1076   %tid.ext = sext i32 %tid to i64
1077   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1078   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1079   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1080   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1081   %a = load volatile float, float addrspace(1)* %a.gep
1082   %b = load volatile float, float addrspace(1)* %b.gep
1083   %c = load volatile float, float addrspace(1)* %c.gep
1084   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1085   %fneg = fsub float -0.000000e+00, %fma
1086   store float %fneg, float addrspace(1)* %out.gep
1087   ret void
1088 }
1089
1090 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1091 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1092 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1093 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1094 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1095 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1096 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1097 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1098 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1099   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1100   %tid.ext = sext i32 %tid to i64
1101   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1102   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1103   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1104   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1105   %a = load volatile float, float addrspace(1)* %a.gep
1106   %b = load volatile float, float addrspace(1)* %b.gep
1107   %c = load volatile float, float addrspace(1)* %c.gep
1108   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1109   %fneg = fsub float -0.000000e+00, %fma
1110   store volatile float %fneg, float addrspace(1)* %out
1111   store volatile float %fma, float addrspace(1)* %out
1112   ret void
1113 }
1114
1115 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1116 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1117 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1118 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1119
1120 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1121 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1122 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1123
1124 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1125 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1126
1127 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1128 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1129 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1130   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1131   %tid.ext = sext i32 %tid to i64
1132   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1133   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1134   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1135   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1136   %a = load volatile float, float addrspace(1)* %a.gep
1137   %b = load volatile float, float addrspace(1)* %b.gep
1138   %c = load volatile float, float addrspace(1)* %c.gep
1139   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1140   %fneg = fsub float -0.000000e+00, %fma
1141   %use1 = fmul float %fma, 4.0
1142   store volatile float %fneg, float addrspace(1)* %out
1143   store volatile float %use1, float addrspace(1)* %out
1144   ret void
1145 }
1146
1147 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1148 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1149 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1150 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1151
1152 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1153 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1154
1155 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1156 ; GCN-NSZ-NOT: [[FMA]]
1157 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1158 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1159   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1160   %tid.ext = sext i32 %tid to i64
1161   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1162   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1163   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1164   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1165   %a = load volatile float, float addrspace(1)* %a.gep
1166   %b = load volatile float, float addrspace(1)* %b.gep
1167   %c = load volatile float, float addrspace(1)* %c.gep
1168   %fneg.a = fsub float -0.000000e+00, %a
1169   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1170   %fneg = fsub float -0.000000e+00, %fma
1171   store volatile float %fneg, float addrspace(1)* %out
1172   ret void
1173 }
1174
1175 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1176 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1177 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1178 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1179
1180 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1181 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1182
1183 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1184 ; GCN-NSZ-NOT: [[FMA]]
1185 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1186 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1187   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1188   %tid.ext = sext i32 %tid to i64
1189   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1190   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1191   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1192   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1193   %a = load volatile float, float addrspace(1)* %a.gep
1194   %b = load volatile float, float addrspace(1)* %b.gep
1195   %c = load volatile float, float addrspace(1)* %c.gep
1196   %fneg.b = fsub float -0.000000e+00, %b
1197   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1198   %fneg = fsub float -0.000000e+00, %fma
1199   store volatile float %fneg, float addrspace(1)* %out
1200   ret void
1201 }
1202
1203 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1204 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1205 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1206 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1207
1208 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
1209 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1210
1211 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1212 ; GCN-NSZ-NOT: [[FMA]]
1213 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1214 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1215   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1216   %tid.ext = sext i32 %tid to i64
1217   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1218   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1219   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1220   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1221   %a = load volatile float, float addrspace(1)* %a.gep
1222   %b = load volatile float, float addrspace(1)* %b.gep
1223   %c = load volatile float, float addrspace(1)* %c.gep
1224   %fneg.a = fsub float -0.000000e+00, %a
1225   %fneg.b = fsub float -0.000000e+00, %b
1226   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1227   %fneg = fsub float -0.000000e+00, %fma
1228   store volatile float %fneg, float addrspace(1)* %out
1229   ret void
1230 }
1231
1232 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1233 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1234 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1235 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1236
1237 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1238 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1239
1240 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1241 ; GCN-NSZ-NOT: [[FMA]]
1242 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1243 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1244   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1245   %tid.ext = sext i32 %tid to i64
1246   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1247   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1248   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1249   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1250   %a = load volatile float, float addrspace(1)* %a.gep
1251   %b = load volatile float, float addrspace(1)* %b.gep
1252   %c = load volatile float, float addrspace(1)* %c.gep
1253   %fneg.a = fsub float -0.000000e+00, %a
1254   %fneg.c = fsub float -0.000000e+00, %c
1255   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1256   %fneg = fsub float -0.000000e+00, %fma
1257   store volatile float %fneg, float addrspace(1)* %out
1258   ret void
1259 }
1260
1261 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1262 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1263 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1264 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1265
1266 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1267 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1268
1269 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1270 ; GCN-NSZ-NOT: [[FMA]]
1271 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1272 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1273   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1274   %tid.ext = sext i32 %tid to i64
1275   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1276   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1277   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1278   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1279   %a = load volatile float, float addrspace(1)* %a.gep
1280   %b = load volatile float, float addrspace(1)* %b.gep
1281   %c = load volatile float, float addrspace(1)* %c.gep
1282   %fneg.c = fsub float -0.000000e+00, %c
1283   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1284   %fneg = fsub float -0.000000e+00, %fma
1285   store volatile float %fneg, float addrspace(1)* %out
1286   ret void
1287 }
1288
1289 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1290 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1291 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1292 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1293
1294 ; GCN-SAFE: v_xor_b32
1295 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1296 ; GCN-SAFE: v_xor_b32
1297
1298 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1299 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1300
1301 ; GCN-NSZ-NOT: [[FMA]]
1302 ; GCN-NSZ-NOT: [[NEG_A]]
1303 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1304 ; GCN-NSZ-NOT: [[NEG_A]]
1305 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1306 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1307   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1308   %tid.ext = sext i32 %tid to i64
1309   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1310   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1311   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1312   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1313   %a = load volatile float, float addrspace(1)* %a.gep
1314   %b = load volatile float, float addrspace(1)* %b.gep
1315   %c = load volatile float, float addrspace(1)* %c.gep
1316   %fneg.a = fsub float -0.000000e+00, %a
1317   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1318   %fneg = fsub float -0.000000e+00, %fma
1319   store volatile float %fneg, float addrspace(1)* %out
1320   store volatile float %fneg.a, float addrspace(1)* %out
1321   ret void
1322 }
1323
1324 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1325 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1326 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1327 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1328
1329 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1330 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
1331 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1332
1333 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1334 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1335 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1336 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1337   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1338   %tid.ext = sext i32 %tid to i64
1339   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1340   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1341   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1342   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1343   %a = load volatile float, float addrspace(1)* %a.gep
1344   %b = load volatile float, float addrspace(1)* %b.gep
1345   %c = load volatile float, float addrspace(1)* %c.gep
1346   %fneg.a = fsub float -0.000000e+00, %a
1347   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1348   %fneg = fsub float -0.000000e+00, %fma
1349   %use1 = fmul float %fneg.a, %d
1350   store volatile float %fneg, float addrspace(1)* %out
1351   store volatile float %use1, float addrspace(1)* %out
1352   ret void
1353 }
1354
1355 ; --------------------------------------------------------------------------------
1356 ; fmad tests
1357 ; --------------------------------------------------------------------------------
1358
1359 ; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1360 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1361 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1362 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1363
1364 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1365 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1366
1367 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1368 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1369 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1370   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1371   %tid.ext = sext i32 %tid to i64
1372   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1373   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1374   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1375   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1376   %a = load volatile float, float addrspace(1)* %a.gep
1377   %b = load volatile float, float addrspace(1)* %b.gep
1378   %c = load volatile float, float addrspace(1)* %c.gep
1379   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1380   %fneg = fsub float -0.000000e+00, %fma
1381   store float %fneg, float addrspace(1)* %out.gep
1382   ret void
1383 }
1384
1385 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1386 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1387 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1388 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1389
1390 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1391 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1392 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1393
1394 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
1395 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1396
1397 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1398 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1399 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1400   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1401   %tid.ext = sext i32 %tid to i64
1402   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1403   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1404   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1405   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1406   %a = load volatile float, float addrspace(1)* %a.gep
1407   %b = load volatile float, float addrspace(1)* %b.gep
1408   %c = load volatile float, float addrspace(1)* %c.gep
1409   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1410   %fneg = fsub float -0.000000e+00, %fma
1411   %use1 = fmul float %fma, 4.0
1412   store volatile float %fneg, float addrspace(1)* %out
1413   store volatile float %use1, float addrspace(1)* %out
1414   ret void
1415 }
1416
1417 ; --------------------------------------------------------------------------------
1418 ; fp_extend tests
1419 ; --------------------------------------------------------------------------------
1420
1421 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1422 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1423 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1424 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1425 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1426   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1427   %tid.ext = sext i32 %tid to i64
1428   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1429   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1430   %a = load volatile float, float addrspace(1)* %a.gep
1431   %fpext = fpext float %a to double
1432   %fneg = fsub double -0.000000e+00, %fpext
1433   store double %fneg, double addrspace(1)* %out.gep
1434   ret void
1435 }
1436
1437 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1438 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1439 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1440 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1441 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1442   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1443   %tid.ext = sext i32 %tid to i64
1444   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1445   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1446   %a = load volatile float, float addrspace(1)* %a.gep
1447   %fneg.a = fsub float -0.000000e+00, %a
1448   %fpext = fpext float %fneg.a to double
1449   %fneg = fsub double -0.000000e+00, %fpext
1450   store double %fneg, double addrspace(1)* %out.gep
1451   ret void
1452 }
1453
1454 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1455 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1456 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1457 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1458 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1459 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1460 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1461   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1462   %tid.ext = sext i32 %tid to i64
1463   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1464   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1465   %a = load volatile float, float addrspace(1)* %a.gep
1466   %fneg.a = fsub float -0.000000e+00, %a
1467   %fpext = fpext float %fneg.a to double
1468   %fneg = fsub double -0.000000e+00, %fpext
1469   store volatile double %fneg, double addrspace(1)* %out.gep
1470   store volatile float %fneg.a, float addrspace(1)* undef
1471   ret void
1472 }
1473
1474 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1475 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1476 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1477 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1478 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1479 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1480 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1481   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1482   %tid.ext = sext i32 %tid to i64
1483   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1484   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1485   %a = load volatile float, float addrspace(1)* %a.gep
1486   %fpext = fpext float %a to double
1487   %fneg = fsub double -0.000000e+00, %fpext
1488   store volatile double %fneg, double addrspace(1)* %out.gep
1489   store volatile double %fpext, double addrspace(1)* undef
1490   ret void
1491 }
1492
1493 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1494 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1495 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1496 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1497 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1498 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1499 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1500 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1501   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1502   %tid.ext = sext i32 %tid to i64
1503   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1504   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1505   %a = load volatile float, float addrspace(1)* %a.gep
1506   %fpext = fpext float %a to double
1507   %fneg = fsub double -0.000000e+00, %fpext
1508   %mul = fmul double %fpext, 4.0
1509   store volatile double %fneg, double addrspace(1)* %out.gep
1510   store volatile double %mul, double addrspace(1)* %out.gep
1511   ret void
1512 }
1513
1514 ; FIXME: Source modifiers not folded for f16->f32
1515 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1516 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1517   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1518   %tid.ext = sext i32 %tid to i64
1519   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1520   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1521   %a = load volatile half, half addrspace(1)* %a.gep
1522   %fpext = fpext half %a to float
1523   %fneg = fsub float -0.000000e+00, %fpext
1524   store volatile float %fneg, float addrspace(1)* %out.gep
1525   store volatile float %fpext, float addrspace(1)* %out.gep
1526   ret void
1527 }
1528
1529 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1530 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1531   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1532   %tid.ext = sext i32 %tid to i64
1533   %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1534   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1535   %a = load volatile half, half addrspace(1)* %a.gep
1536   %fpext = fpext half %a to float
1537   %fneg = fsub float -0.000000e+00, %fpext
1538   %mul = fmul float %fpext, 4.0
1539   store volatile float %fneg, float addrspace(1)* %out.gep
1540   store volatile float %mul, float addrspace(1)* %out.gep
1541   ret void
1542 }
1543
1544 ; --------------------------------------------------------------------------------
1545 ; fp_round tests
1546 ; --------------------------------------------------------------------------------
1547
1548 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1549 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1550 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1551 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1552 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1553   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1554   %tid.ext = sext i32 %tid to i64
1555   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1556   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1557   %a = load volatile double, double addrspace(1)* %a.gep
1558   %fpround = fptrunc double %a to float
1559   %fneg = fsub float -0.000000e+00, %fpround
1560   store float %fneg, float addrspace(1)* %out.gep
1561   ret void
1562 }
1563
1564 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1565 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1566 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1567 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1568 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1569   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1570   %tid.ext = sext i32 %tid to i64
1571   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1572   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1573   %a = load volatile double, double addrspace(1)* %a.gep
1574   %fneg.a = fsub double -0.000000e+00, %a
1575   %fpround = fptrunc double %fneg.a to float
1576   %fneg = fsub float -0.000000e+00, %fpround
1577   store float %fneg, float addrspace(1)* %out.gep
1578   ret void
1579 }
1580
1581 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1582 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1583 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1584 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1585 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1586 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1587 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1588   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1589   %tid.ext = sext i32 %tid to i64
1590   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1591   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1592   %a = load volatile double, double addrspace(1)* %a.gep
1593   %fneg.a = fsub double -0.000000e+00, %a
1594   %fpround = fptrunc double %fneg.a to float
1595   %fneg = fsub float -0.000000e+00, %fpround
1596   store volatile float %fneg, float addrspace(1)* %out.gep
1597   store volatile double %fneg.a, double addrspace(1)* undef
1598   ret void
1599 }
1600
1601 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1602 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1603 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1604 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1605
1606 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1607 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1608 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1609   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1610   %tid.ext = sext i32 %tid to i64
1611   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1612   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1613   %a = load volatile double, double addrspace(1)* %a.gep
1614   %fneg.a = fsub double -0.000000e+00, %a
1615   %fpround = fptrunc double %fneg.a to float
1616   %fneg = fsub float -0.000000e+00, %fpround
1617   %use1 = fmul double %fneg.a, %c
1618   store volatile float %fneg, float addrspace(1)* %out.gep
1619   store volatile double %use1, double addrspace(1)* undef
1620   ret void
1621 }
1622
1623 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1624 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1625 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1626 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1627 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1628   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1629   %tid.ext = sext i32 %tid to i64
1630   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1631   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1632   %a = load volatile float, float addrspace(1)* %a.gep
1633   %fpround = fptrunc float %a to half
1634   %fneg = fsub half -0.000000e+00, %fpround
1635   store half %fneg, half addrspace(1)* %out.gep
1636   ret void
1637 }
1638
1639 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1640 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1641 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1642 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1643 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1644   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1645   %tid.ext = sext i32 %tid to i64
1646   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1647   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1648   %a = load volatile float, float addrspace(1)* %a.gep
1649   %fneg.a = fsub float -0.000000e+00, %a
1650   %fpround = fptrunc float %fneg.a to half
1651   %fneg = fsub half -0.000000e+00, %fpround
1652   store half %fneg, half addrspace(1)* %out.gep
1653   ret void
1654 }
1655
1656 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1657 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1658 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1659 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1660 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1661 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1662 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1663   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1664   %tid.ext = sext i32 %tid to i64
1665   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1666   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1667   %a = load volatile double, double addrspace(1)* %a.gep
1668   %fpround = fptrunc double %a to float
1669   %fneg = fsub float -0.000000e+00, %fpround
1670   store volatile float %fneg, float addrspace(1)* %out.gep
1671   store volatile float %fpround, float addrspace(1)* %out.gep
1672   ret void
1673 }
1674
1675 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1676 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1677 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1678 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1679 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1680 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1681 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1682   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1683   %tid.ext = sext i32 %tid to i64
1684   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1685   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1686   %a = load volatile float, float addrspace(1)* %a.gep
1687   %fneg.a = fsub float -0.000000e+00, %a
1688   %fpround = fptrunc float %fneg.a to half
1689   %fneg = fsub half -0.000000e+00, %fpround
1690   store volatile half %fneg, half addrspace(1)* %out.gep
1691   store volatile float %fneg.a, float addrspace(1)* undef
1692   ret void
1693 }
1694
1695 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1696 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1697 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1698 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1699 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1700 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1701 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1702   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1703   %tid.ext = sext i32 %tid to i64
1704   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1705   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1706   %a = load volatile float, float addrspace(1)* %a.gep
1707   %fneg.a = fsub float -0.000000e+00, %a
1708   %fpround = fptrunc float %fneg.a to half
1709   %fneg = fsub half -0.000000e+00, %fpround
1710   %use1 = fmul float %fneg.a, %c
1711   store volatile half %fneg, half addrspace(1)* %out.gep
1712   store volatile float %use1, float addrspace(1)* undef
1713   ret void
1714 }
1715
1716 ; --------------------------------------------------------------------------------
1717 ; rcp tests
1718 ; --------------------------------------------------------------------------------
1719
1720 ; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1721 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1722 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1723 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1724 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1725   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1726   %tid.ext = sext i32 %tid to i64
1727   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1728   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1729   %a = load volatile float, float addrspace(1)* %a.gep
1730   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1731   %fneg = fsub float -0.000000e+00, %rcp
1732   store float %fneg, float addrspace(1)* %out.gep
1733   ret void
1734 }
1735
1736 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1737 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1738 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1739 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1740 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1741   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1742   %tid.ext = sext i32 %tid to i64
1743   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1744   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1745   %a = load volatile float, float addrspace(1)* %a.gep
1746   %fneg.a = fsub float -0.000000e+00, %a
1747   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1748   %fneg = fsub float -0.000000e+00, %rcp
1749   store float %fneg, float addrspace(1)* %out.gep
1750   ret void
1751 }
1752
1753 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1754 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1755 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1756 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1757 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1758 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1759 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1760   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1761   %tid.ext = sext i32 %tid to i64
1762   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1763   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1764   %a = load volatile float, float addrspace(1)* %a.gep
1765   %fneg.a = fsub float -0.000000e+00, %a
1766   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1767   %fneg = fsub float -0.000000e+00, %rcp
1768   store volatile float %fneg, float addrspace(1)* %out.gep
1769   store volatile float %fneg.a, float addrspace(1)* undef
1770   ret void
1771 }
1772
1773 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1774 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1775 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1776 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1777 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1778 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1779 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1780   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1781   %tid.ext = sext i32 %tid to i64
1782   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1783   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1784   %a = load volatile float, float addrspace(1)* %a.gep
1785   %fneg.a = fsub float -0.000000e+00, %a
1786   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1787   %fneg = fsub float -0.000000e+00, %rcp
1788   %use1 = fmul float %fneg.a, %c
1789   store volatile float %fneg, float addrspace(1)* %out.gep
1790   store volatile float %use1, float addrspace(1)* undef
1791   ret void
1792 }
1793
1794 ; --------------------------------------------------------------------------------
1795 ; fmul_legacy tests
1796 ; --------------------------------------------------------------------------------
1797
1798 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1799 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1800 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1801 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1802 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1803 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1804   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1805   %tid.ext = sext i32 %tid to i64
1806   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1807   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1808   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1809   %a = load volatile float, float addrspace(1)* %a.gep
1810   %b = load volatile float, float addrspace(1)* %b.gep
1811   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1812   %fneg = fsub float -0.000000e+00, %mul
1813   store float %fneg, float addrspace(1)* %out.gep
1814   ret void
1815 }
1816
1817 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1818 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1819 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1820 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1821 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1822 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1823 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1824 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1825   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1826   %tid.ext = sext i32 %tid to i64
1827   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1828   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1829   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1830   %a = load volatile float, float addrspace(1)* %a.gep
1831   %b = load volatile float, float addrspace(1)* %b.gep
1832   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1833   %fneg = fsub float -0.000000e+00, %mul
1834   store volatile float %fneg, float addrspace(1)* %out
1835   store volatile float %mul, float addrspace(1)* %out
1836   ret void
1837 }
1838
1839 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1840 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1841 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1842 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1843 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1844 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1845 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1846 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1847   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1848   %tid.ext = sext i32 %tid to i64
1849   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1850   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1851   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1852   %a = load volatile float, float addrspace(1)* %a.gep
1853   %b = load volatile float, float addrspace(1)* %b.gep
1854   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1855   %fneg = fsub float -0.000000e+00, %mul
1856   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1857   store volatile float %fneg, float addrspace(1)* %out
1858   store volatile float %use1, float addrspace(1)* %out
1859   ret void
1860 }
1861
1862 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1863 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1864 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1865 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1866 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1867 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1868   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1869   %tid.ext = sext i32 %tid to i64
1870   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1871   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1872   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1873   %a = load volatile float, float addrspace(1)* %a.gep
1874   %b = load volatile float, float addrspace(1)* %b.gep
1875   %fneg.a = fsub float -0.000000e+00, %a
1876   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1877   %fneg = fsub float -0.000000e+00, %mul
1878   store volatile float %fneg, float addrspace(1)* %out
1879   ret void
1880 }
1881
1882 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1883 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1884 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1885 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1886 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1887 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1888   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1889   %tid.ext = sext i32 %tid to i64
1890   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1891   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1892   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1893   %a = load volatile float, float addrspace(1)* %a.gep
1894   %b = load volatile float, float addrspace(1)* %b.gep
1895   %fneg.b = fsub float -0.000000e+00, %b
1896   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1897   %fneg = fsub float -0.000000e+00, %mul
1898   store volatile float %fneg, float addrspace(1)* %out
1899   ret void
1900 }
1901
1902 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1903 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1904 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1905 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1906 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1907 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1908   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1909   %tid.ext = sext i32 %tid to i64
1910   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1911   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1912   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1913   %a = load volatile float, float addrspace(1)* %a.gep
1914   %b = load volatile float, float addrspace(1)* %b.gep
1915   %fneg.a = fsub float -0.000000e+00, %a
1916   %fneg.b = fsub float -0.000000e+00, %b
1917   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1918   %fneg = fsub float -0.000000e+00, %mul
1919   store volatile float %fneg, float addrspace(1)* %out
1920   ret void
1921 }
1922
1923 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1924 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1925 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1926 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1927 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1928 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1929 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1930 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1931   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1932   %tid.ext = sext i32 %tid to i64
1933   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1934   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1935   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1936   %a = load volatile float, float addrspace(1)* %a.gep
1937   %b = load volatile float, float addrspace(1)* %b.gep
1938   %fneg.a = fsub float -0.000000e+00, %a
1939   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1940   %fneg = fsub float -0.000000e+00, %mul
1941   store volatile float %fneg, float addrspace(1)* %out
1942   store volatile float %fneg.a, float addrspace(1)* %out
1943   ret void
1944 }
1945
1946 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1947 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1948 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1949 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1950 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1951 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1952 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1953 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1954   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1955   %tid.ext = sext i32 %tid to i64
1956   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1957   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1958   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1959   %a = load volatile float, float addrspace(1)* %a.gep
1960   %b = load volatile float, float addrspace(1)* %b.gep
1961   %fneg.a = fsub float -0.000000e+00, %a
1962   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1963   %fneg = fsub float -0.000000e+00, %mul
1964   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1965   store volatile float %fneg, float addrspace(1)* %out
1966   store volatile float %use1, float addrspace(1)* %out
1967   ret void
1968 }
1969
1970 ; --------------------------------------------------------------------------------
1971 ; sin tests
1972 ; --------------------------------------------------------------------------------
1973
1974 ; GCN-LABEL: {{^}}v_fneg_sin_f32:
1975 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1976 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
1977 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1978 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1979 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1980 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1981   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1982   %tid.ext = sext i32 %tid to i64
1983   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1984   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1985   %a = load volatile float, float addrspace(1)* %a.gep
1986   %sin = call float @llvm.sin.f32(float %a)
1987   %fneg = fsub float -0.000000e+00, %sin
1988   store float %fneg, float addrspace(1)* %out.gep
1989   ret void
1990 }
1991
1992 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1993 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1994 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1995 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1996 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1997   %tid = call i32 @llvm.amdgcn.workitem.id.x()
1998   %tid.ext = sext i32 %tid to i64
1999   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2000   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2001   %a = load volatile float, float addrspace(1)* %a.gep
2002   %sin = call float @llvm.amdgcn.sin.f32(float %a)
2003   %fneg = fsub float -0.0, %sin
2004   store float %fneg, float addrspace(1)* %out.gep
2005   ret void
2006 }
2007
2008 ; --------------------------------------------------------------------------------
2009 ; ftrunc tests
2010 ; --------------------------------------------------------------------------------
2011
2012 ; GCN-LABEL: {{^}}v_fneg_trunc_f32:
2013 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2014 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2015 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2016 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2017   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2018   %tid.ext = sext i32 %tid to i64
2019   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2020   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2021   %a = load volatile float, float addrspace(1)* %a.gep
2022   %trunc = call float @llvm.trunc.f32(float %a)
2023   %fneg = fsub float -0.0, %trunc
2024   store float %fneg, float addrspace(1)* %out.gep
2025   ret void
2026 }
2027
2028 ; --------------------------------------------------------------------------------
2029 ; fround tests
2030 ; --------------------------------------------------------------------------------
2031
2032 ; GCN-LABEL: {{^}}v_fneg_round_f32:
2033 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2034 ; GCN: v_trunc_f32_e32
2035 ; GCN: v_sub_f32_e32
2036 ; GCN: v_cndmask_b32
2037
2038 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2039 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2040
2041 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2042 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2043 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2044   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2045   %tid.ext = sext i32 %tid to i64
2046   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2047   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2048   %a = load volatile float, float addrspace(1)* %a.gep
2049   %round = call float @llvm.round.f32(float %a)
2050   %fneg = fsub float -0.0, %round
2051   store float %fneg, float addrspace(1)* %out.gep
2052   ret void
2053 }
2054
2055 ; --------------------------------------------------------------------------------
2056 ; rint tests
2057 ; --------------------------------------------------------------------------------
2058
2059 ; GCN-LABEL: {{^}}v_fneg_rint_f32:
2060 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2061 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2062 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2063 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2064   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2065   %tid.ext = sext i32 %tid to i64
2066   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2067   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2068   %a = load volatile float, float addrspace(1)* %a.gep
2069   %rint = call float @llvm.rint.f32(float %a)
2070   %fneg = fsub float -0.0, %rint
2071   store float %fneg, float addrspace(1)* %out.gep
2072   ret void
2073 }
2074
2075 ; --------------------------------------------------------------------------------
2076 ; nearbyint tests
2077 ; --------------------------------------------------------------------------------
2078
2079 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2080 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2081 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2082 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2083 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2084   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2085   %tid.ext = sext i32 %tid to i64
2086   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2087   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2088   %a = load volatile float, float addrspace(1)* %a.gep
2089   %nearbyint = call float @llvm.nearbyint.f32(float %a)
2090   %fneg = fsub float -0.0, %nearbyint
2091   store float %fneg, float addrspace(1)* %out.gep
2092   ret void
2093 }
2094
2095 ; --------------------------------------------------------------------------------
2096 ; fcanonicalize tests
2097 ; --------------------------------------------------------------------------------
2098
2099 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2100 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2101 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2102 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2103 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2104   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2105   %tid.ext = sext i32 %tid to i64
2106   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2107   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2108   %a = load volatile float, float addrspace(1)* %a.gep
2109   %trunc = call float @llvm.canonicalize.f32(float %a)
2110   %fneg = fsub float -0.0, %trunc
2111   store float %fneg, float addrspace(1)* %out.gep
2112   ret void
2113 }
2114
2115 ; --------------------------------------------------------------------------------
2116 ; vintrp tests
2117 ; --------------------------------------------------------------------------------
2118
2119 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2120 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2121 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2122 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2123 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2124 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2125 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2126   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2127   %tid.ext = sext i32 %tid to i64
2128   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2129   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2130   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2131   %a = load volatile float, float addrspace(1)* %a.gep
2132   %b = load volatile float, float addrspace(1)* %b.gep
2133   %mul = fmul float %a, %b
2134   %fneg = fsub float -0.0, %mul
2135   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2136   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2137   store volatile float %intrp0, float addrspace(1)* %out.gep
2138   store volatile float %intrp1, float addrspace(1)* %out.gep
2139   ret void
2140 }
2141
2142 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2143 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2144 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2145 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2146 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2147 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2148 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2149   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2150   %tid.ext = sext i32 %tid to i64
2151   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2152   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2153   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2154   %a = load volatile float, float addrspace(1)* %a.gep
2155   %b = load volatile float, float addrspace(1)* %b.gep
2156   %mul = fmul float %a, %b
2157   %fneg = fsub float -0.0, %mul
2158   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2159   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2160   store volatile float %intrp0, float addrspace(1)* %out.gep
2161   store volatile float %intrp1, float addrspace(1)* %out.gep
2162   ret void
2163 }
2164
2165 ; --------------------------------------------------------------------------------
2166 ; CopyToReg tests
2167 ; --------------------------------------------------------------------------------
2168
2169 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2170 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2171 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2172 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2173 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2174 ; GCN: s_cbranch_scc0
2175
2176 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2177 ; GCN: s_endpgm
2178
2179 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2180 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2181 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2182
2183 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2184   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2185   %tid.ext = sext i32 %tid to i64
2186   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2187   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2188   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2189   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2190   %a = load volatile float, float addrspace(1)* %a.gep
2191   %b = load volatile float, float addrspace(1)* %b.gep
2192   %c = load volatile float, float addrspace(1)* %c.gep
2193   %mul = fmul float %a, %b
2194   %fneg = fsub float -0.0, %mul
2195   %cmp0 = icmp eq i32 %d, 0
2196   br i1 %cmp0, label %if, label %endif
2197
2198 if:
2199   %mul1 = fmul float %fneg, %c
2200   store volatile float %mul1, float addrspace(1)* %out.gep
2201   br label %endif
2202
2203 endif:
2204   store volatile float %mul, float addrspace(1)* %out.gep
2205   ret void
2206 }
2207
2208 ; --------------------------------------------------------------------------------
2209 ; inlineasm tests
2210 ; --------------------------------------------------------------------------------
2211
2212 ; Can't fold into use, so should fold into source
2213 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2214 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2215 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2216 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2217 ; GCN: ; use [[MUL]]
2218 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2219 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2220   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2221   %tid.ext = sext i32 %tid to i64
2222   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2223   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2224   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2225   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2226   %a = load volatile float, float addrspace(1)* %a.gep
2227   %b = load volatile float, float addrspace(1)* %b.gep
2228   %c = load volatile float, float addrspace(1)* %c.gep
2229   %mul = fmul float %a, %b
2230   %fneg = fsub float -0.0, %mul
2231   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2232   store volatile float %fneg, float addrspace(1)* %out.gep
2233   ret void
2234 }
2235
2236 ; --------------------------------------------------------------------------------
2237 ; inlineasm tests
2238 ; --------------------------------------------------------------------------------
2239
2240 ; Can't fold into use, so should fold into source
2241 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2242 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2243 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2244 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2245 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2246 ; GCN: ; use [[NEG]]
2247 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2248 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2249   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2250   %tid.ext = sext i32 %tid to i64
2251   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2252   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2253   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2254   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2255   %a = load volatile float, float addrspace(1)* %a.gep
2256   %b = load volatile float, float addrspace(1)* %b.gep
2257   %c = load volatile float, float addrspace(1)* %c.gep
2258   %mul = fmul float %a, %b
2259   %fneg = fsub float -0.0, %mul
2260   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2261   store volatile float %mul, float addrspace(1)* %out.gep
2262   ret void
2263 }
2264
2265 ; --------------------------------------------------------------------------------
2266 ; code size regression tests
2267 ; --------------------------------------------------------------------------------
2268
2269 ; There are multiple users of the fneg that must use a VOP3
2270 ; instruction, so there is no penalty
2271 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2272 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2273 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2274 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2275
2276 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2277 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2278
2279 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2280 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2281 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2282   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2283   %tid.ext = sext i32 %tid to i64
2284   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2285   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2286   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2287   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2288   %a = load volatile float, float addrspace(1)* %a.gep
2289   %b = load volatile float, float addrspace(1)* %b.gep
2290   %c = load volatile float, float addrspace(1)* %c.gep
2291
2292   %fneg.a = fsub float -0.0, %a
2293   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2294   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2295
2296   store volatile float %fma0, float addrspace(1)* %out
2297   store volatile float %fma1, float addrspace(1)* %out
2298   ret void
2299 }
2300
2301 ; There are multiple users, but both require using a larger encoding
2302 ; for the modifier.
2303
2304 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2305 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2306 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2307 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2308
2309 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2310 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2311 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2312 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2313 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2314   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2315   %tid.ext = sext i32 %tid to i64
2316   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2317   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2318   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2319   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2320   %a = load volatile float, float addrspace(1)* %a.gep
2321   %b = load volatile float, float addrspace(1)* %b.gep
2322   %c = load volatile float, float addrspace(1)* %c.gep
2323
2324   %fneg.a = fsub float -0.0, %a
2325   %mul0 = fmul float %fneg.a, %b
2326   %mul1 = fmul float %fneg.a, %c
2327
2328   store volatile float %mul0, float addrspace(1)* %out
2329   store volatile float %mul1, float addrspace(1)* %out
2330   ret void
2331 }
2332
2333 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2334 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2335 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2336 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2337 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2338
2339 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2340 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2341
2342 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2343 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2344 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2345   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2346   %tid.ext = sext i32 %tid to i64
2347   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2348   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2349   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2350   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2351   %a = load volatile float, float addrspace(1)* %a.gep
2352   %b = load volatile float, float addrspace(1)* %b.gep
2353   %c = load volatile float, float addrspace(1)* %c.gep
2354
2355   %fneg.a = fsub float -0.0, %a
2356   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2357   %mul1 = fmul float %fneg.a, %c
2358
2359   store volatile float %fma0, float addrspace(1)* %out
2360   store volatile float %mul1, float addrspace(1)* %out
2361   ret void
2362 }
2363
2364 ; The use of the fneg requires a code size increase, but folding into
2365 ; the source does not
2366
2367 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2368 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2369 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2370 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2371 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2372
2373 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2374 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2375 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2376
2377 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2378 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2379 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2380
2381 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2382 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2383 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2384   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2385   %tid.ext = sext i32 %tid to i64
2386   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2387   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2388   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2389   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2390   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2391   %a = load volatile float, float addrspace(1)* %a.gep
2392   %b = load volatile float, float addrspace(1)* %b.gep
2393   %c = load volatile float, float addrspace(1)* %c.gep
2394   %d = load volatile float, float addrspace(1)* %d.gep
2395
2396   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2397   %fneg.fma0 = fsub float -0.0, %fma0
2398   %mul1 = fmul float %fneg.fma0, %c
2399   %mul2 = fmul float %fneg.fma0, %d
2400
2401   store volatile float %mul1, float addrspace(1)* %out
2402   store volatile float %mul2, float addrspace(1)* %out
2403   ret void
2404 }
2405
2406 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2407 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2408 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2409 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2410 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2411
2412 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2413 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2414 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2415
2416 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2417 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2418 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2419   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2420   %tid.ext = sext i32 %tid to i64
2421   %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2422   %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2423   %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2424   %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2425   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2426   %a = load volatile double, double addrspace(1)* %a.gep
2427   %b = load volatile double, double addrspace(1)* %b.gep
2428   %c = load volatile double, double addrspace(1)* %c.gep
2429   %d = load volatile double, double addrspace(1)* %d.gep
2430
2431   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2432   %fneg.fma0 = fsub double -0.0, %fma0
2433   %mul1 = fmul double %fneg.fma0, %c
2434   %mul2 = fmul double %fneg.fma0, %d
2435
2436   store volatile double %mul1, double addrspace(1)* %out
2437   store volatile double %mul2, double addrspace(1)* %out
2438   ret void
2439 }
2440
2441 ; %trunc.a has one fneg use, but it requires a code size increase and
2442 ; %the fneg can instead be folded for free into the fma.
2443
2444 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2445 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2446 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2447 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2448 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2449 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2450 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2451 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2452   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2453   %tid.ext = sext i32 %tid to i64
2454   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2455   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2456   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2457   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2458   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2459   %a = load volatile float, float addrspace(1)* %a.gep
2460   %b = load volatile float, float addrspace(1)* %b.gep
2461   %c = load volatile float, float addrspace(1)* %c.gep
2462   %d = load volatile float, float addrspace(1)* %d.gep
2463
2464   %trunc.a = call float @llvm.trunc.f32(float %a)
2465   %trunc.fneg.a = fsub float -0.0, %trunc.a
2466   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2467   store volatile float %fma0, float addrspace(1)* %out
2468   ret void
2469 }
2470
2471 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2472 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2473 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2474 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2475 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2476 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2477 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2478 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2479 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2481 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2482   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2483   %tid.ext = sext i32 %tid to i64
2484   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2485   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2486   %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2487   %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2488   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2489   %a = load volatile float, float addrspace(1)* %a.gep
2490   %b = load volatile float, float addrspace(1)* %b.gep
2491   %c = load volatile float, float addrspace(1)* %c.gep
2492   %d = load volatile float, float addrspace(1)* %d.gep
2493
2494   %trunc.a = call float @llvm.trunc.f32(float %a)
2495   %trunc.fneg.a = fsub float -0.0, %trunc.a
2496   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2497   %mul1 = fmul float %trunc.a, %d
2498   store volatile float %fma0, float addrspace(1)* %out
2499   store volatile float %mul1, float addrspace(1)* %out
2500   ret void
2501 }
2502
2503 declare i32 @llvm.amdgcn.workitem.id.x() #1
2504 declare float @llvm.fma.f32(float, float, float) #1
2505 declare float @llvm.fmuladd.f32(float, float, float) #1
2506 declare float @llvm.sin.f32(float) #1
2507 declare float @llvm.trunc.f32(float) #1
2508 declare float @llvm.round.f32(float) #1
2509 declare float @llvm.rint.f32(float) #1
2510 declare float @llvm.nearbyint.f32(float) #1
2511 declare float @llvm.canonicalize.f32(float) #1
2512 declare float @llvm.minnum.f32(float, float) #1
2513 declare float @llvm.maxnum.f32(float, float) #1
2514 declare half @llvm.minnum.f16(half, half) #1
2515 declare double @llvm.minnum.f64(double, double) #1
2516 declare double @llvm.fma.f64(double, double, double) #1
2517
2518 declare float @llvm.amdgcn.sin.f32(float) #1
2519 declare float @llvm.amdgcn.rcp.f32(float) #1
2520 declare float @llvm.amdgcn.rcp.legacy(float) #1
2521 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2522 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2523 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2524
2525 attributes #0 = { nounwind }
2526 attributes #1 = { nounwind readnone }