llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
   3 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
   4
   5 ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
   6 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
   7
   8 ; --------------------------------------------------------------------------------
   9 ; fadd tests
  10 ; --------------------------------------------------------------------------------
  11
  12 define float @v_fneg_add_f32(float %a, float %b) #0 {
  13 ; GCN-SAFE-LABEL: v_fneg_add_f32:
  14 ; GCN-SAFE:       ; %bb.0:
  15 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  16 ; GCN-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
  17 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
  18 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
  19 ;
  20 ; GCN-NSZ-LABEL: v_fneg_add_f32:
  21 ; GCN-NSZ:       ; %bb.0:
  22 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  23 ; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v0, v1
  24 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
  25   %add = fadd float %a, %b
  26   %fneg = fneg float %add
  27   ret float %fneg
  28 }
  29
  30 define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) #0 {
  31 ; GCN-LABEL: v_fneg_add_store_use_add_f32:
  32 ; GCN:       ; %bb.0:
  33 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  34 ; GCN-NEXT:    v_add_f32_e32 v1, v0, v1
  35 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
  36 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  37   %add = fadd float %a, %b
  38   %fneg = fneg float %add
  39   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
  40   %insert.1 = insertvalue { float, float } %insert.0, float %add, 1
  41   ret { float, float } %insert.1
  42 }
  43
  44 define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) #0 {
  45 ; GCN-SAFE-LABEL: v_fneg_add_multi_use_add_f32:
  46 ; GCN-SAFE:       ; %bb.0:
  47 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  48 ; GCN-SAFE-NEXT:    v_add_f32_e32 v1, v0, v1
  49 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
  50 ; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
  51 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
  52 ;
  53 ; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f32:
  54 ; GCN-NSZ:       ; %bb.0:
  55 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  56 ; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v0, v1
  57 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
  58 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
  59   %add = fadd float %a, %b
  60   %fneg = fneg float %add
  61   %use1 = fmul float %add, 4.0
  62
  63   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
  64   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
  65   ret { float, float } %insert.1
  66 }
  67
  68 define float @v_fneg_add_fneg_x_f32(float %a, float %b) #0 {
  69 ; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f32:
  70 ; GCN-SAFE:       ; %bb.0:
  71 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  72 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v0
  73 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
  74 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
  75 ;
  76 ; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f32:
  77 ; GCN-NSZ:       ; %bb.0:
  78 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  79 ; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
  80 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
  81   %fneg.a = fneg float %a
  82   %add = fadd float %fneg.a, %b
  83   %fneg = fneg float %add
  84   ret float %fneg
  85 }
  86
  87 define float @v_fneg_add_x_fneg_f32(float %a, float %b) #0 {
  88 ; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f32:
  89 ; GCN-SAFE:       ; %bb.0:
  90 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  91 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v0, v1
  92 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
  93 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
  94 ;
  95 ; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f32:
  96 ; GCN-NSZ:       ; %bb.0:
  97 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  98 ; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v1, v0
  99 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 100   %fneg.b = fneg float %b
 101   %add = fadd float %a, %fneg.b
 102   %fneg = fneg float %add
 103   ret float %fneg
 104 }
 105
 106 define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) #0 {
 107 ; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f32:
 108 ; GCN-SAFE:       ; %bb.0:
 109 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 110 ; GCN-SAFE-NEXT:    v_sub_f32_e64 v0, -v0, v1
 111 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 112 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 113 ;
 114 ; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f32:
 115 ; GCN-NSZ:       ; %bb.0:
 116 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 117 ; GCN-NSZ-NEXT:    v_add_f32_e32 v0, v0, v1
 118 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 119   %fneg.a = fneg float %a
 120   %fneg.b = fneg float %b
 121   %add = fadd float %fneg.a, %fneg.b
 122   %fneg = fneg float %add
 123   ret float %fneg
 124 }
 125
 126 define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) #0 {
 127 ; GCN-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f32:
 128 ; GCN-SAFE:       ; %bb.0:
 129 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 130 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 131 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v0
 132 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 133 ; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, v2
 134 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 135 ;
 136 ; GCN-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f32:
 137 ; GCN-NSZ:       ; %bb.0:
 138 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 139 ; GCN-NSZ-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 140 ; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
 141 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v2
 142 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 143   %fneg.a = fneg float %a
 144   %add = fadd float %fneg.a, %b
 145   %fneg = fneg float %add
 146   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 147   %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
 148   ret { float, float } %insert.1
 149 }
 150
 151 define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
 152 ; GCN-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f32:
 153 ; GCN-SAFE:       ; %bb.0:
 154 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 155 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v1, v1, v0
 156 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
 157 ; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v0, v2
 158 ; GCN-SAFE-NEXT:    v_mov_b32_e32 v0, v3
 159 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 160 ;
 161 ; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f32:
 162 ; GCN-NSZ:       ; %bb.0:
 163 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 164 ; GCN-NSZ-NEXT:    v_sub_f32_e32 v3, v0, v1
 165 ; GCN-NSZ-NEXT:    v_mul_f32_e64 v1, -v0, v2
 166 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v3
 167 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 168   %fneg.a = fneg float %a
 169   %add = fadd float %fneg.a, %b
 170   %fneg = fneg float %add
 171   %use1 = fmul float %fneg.a, %c
 172
 173   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 174   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
 175   ret { float, float } %insert.1
 176 }
 177
 178 ; This one asserted with -enable-no-signed-zeros-fp-math
 179 define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 {
 180 ; SI-SAFE-LABEL: fneg_fadd_0_f32:
 181 ; SI-SAFE:       ; %bb.0: ; %.entry
 182 ; SI-SAFE-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
 183 ; SI-SAFE-NEXT:    v_rcp_f32_e32 v1, v0
 184 ; SI-SAFE-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
 185 ; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 186 ; SI-SAFE-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
 187 ; SI-SAFE-NEXT:    v_fma_f32 v1, v3, v1, v1
 188 ; SI-SAFE-NEXT:    v_mul_f32_e32 v3, v2, v1
 189 ; SI-SAFE-NEXT:    v_fma_f32 v4, -v0, v3, v2
 190 ; SI-SAFE-NEXT:    v_fma_f32 v3, v4, v1, v3
 191 ; SI-SAFE-NEXT:    v_fma_f32 v0, -v0, v3, v2
 192 ; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 193 ; SI-SAFE-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
 194 ; SI-SAFE-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
 195 ; SI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
 196 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
 197 ; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 198 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 199 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 200 ; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 201 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 202 ; SI-SAFE-NEXT:    ; return to shader part epilog
 203 ;
 204 ; SI-NSZ-LABEL: fneg_fadd_0_f32:
 205 ; SI-NSZ:       ; %bb.0: ; %.entry
 206 ; SI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
 207 ; SI-NSZ-NEXT:    v_rcp_f32_e32 v1, v0
 208 ; SI-NSZ-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
 209 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 210 ; SI-NSZ-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
 211 ; SI-NSZ-NEXT:    v_fma_f32 v1, v3, v1, v1
 212 ; SI-NSZ-NEXT:    v_mul_f32_e32 v3, v2, v1
 213 ; SI-NSZ-NEXT:    v_fma_f32 v4, -v0, v3, v2
 214 ; SI-NSZ-NEXT:    v_fma_f32 v3, v4, v1, v3
 215 ; SI-NSZ-NEXT:    v_fma_f32 v0, -v0, v3, v2
 216 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 217 ; SI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
 218 ; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
 219 ; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 220 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, s0
 221 ; SI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 222 ; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 223 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 224 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 225 ; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 226 ; SI-NSZ-NEXT:    ; return to shader part epilog
 227 ;
 228 ; VI-SAFE-LABEL: fneg_fadd_0_f32:
 229 ; VI-SAFE:       ; %bb.0: ; %.entry
 230 ; VI-SAFE-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
 231 ; VI-SAFE-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
 232 ; VI-SAFE-NEXT:    v_rcp_f32_e32 v2, v0
 233 ; VI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 234 ; VI-SAFE-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
 235 ; VI-SAFE-NEXT:    v_fma_f32 v2, v3, v2, v2
 236 ; VI-SAFE-NEXT:    v_mul_f32_e32 v3, v1, v2
 237 ; VI-SAFE-NEXT:    v_fma_f32 v4, -v0, v3, v1
 238 ; VI-SAFE-NEXT:    v_fma_f32 v3, v4, v2, v3
 239 ; VI-SAFE-NEXT:    v_fma_f32 v0, -v0, v3, v1
 240 ; VI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 241 ; VI-SAFE-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
 242 ; VI-SAFE-NEXT:    v_mov_b32_e32 v2, s0
 243 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 244 ; VI-SAFE-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
 245 ; VI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
 246 ; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 247 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
 248 ; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 249 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 250 ; VI-SAFE-NEXT:    ; return to shader part epilog
 251 ;
 252 ; VI-NSZ-LABEL: fneg_fadd_0_f32:
 253 ; VI-NSZ:       ; %bb.0: ; %.entry
 254 ; VI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
 255 ; VI-NSZ-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
 256 ; VI-NSZ-NEXT:    v_rcp_f32_e32 v2, v0
 257 ; VI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 258 ; VI-NSZ-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
 259 ; VI-NSZ-NEXT:    v_fma_f32 v2, v3, v2, v2
 260 ; VI-NSZ-NEXT:    v_mul_f32_e32 v3, v1, v2
 261 ; VI-NSZ-NEXT:    v_fma_f32 v4, -v0, v3, v1
 262 ; VI-NSZ-NEXT:    v_fma_f32 v3, v4, v2, v3
 263 ; VI-NSZ-NEXT:    v_fma_f32 v0, -v0, v3, v1
 264 ; VI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 265 ; VI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
 266 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s0
 267 ; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 268 ; VI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
 269 ; VI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 270 ; VI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 271 ; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
 272 ; VI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 273 ; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 274 ; VI-NSZ-NEXT:    ; return to shader part epilog
 275 .entry:
 276   %tmp7 = fdiv float 1.000000e+00, %tmp6
 277   %tmp8 = fmul float 0.000000e+00, %tmp7
 278   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
 279   %.i188 = fadd float %tmp9, 0.000000e+00
 280   %tmp10 = fcmp uge float %.i188, %tmp2
 281   %tmp11 = fneg float %.i188
 282   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
 283   %tmp12 = fcmp ule float %.i092, 0.000000e+00
 284   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
 285   ret float %.i198
 286 }
 287
 288 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
 289 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 290 ; when that is done.
 291 define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
 292 ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
 293 ; SI-SAFE:       ; %bb.0: ; %.entry
 294 ; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
 295 ; SI-SAFE-NEXT:    s_brev_b32 s0, 1
 296 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 297 ; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 298 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 299 ; SI-SAFE-NEXT:    ; return to shader part epilog
 300 ;
 301 ; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32:
 302 ; GCN-NSZ:       ; %bb.0: ; %.entry
 303 ; GCN-NSZ-NEXT:    v_rcp_f32_e32 v0, s1
 304 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, s0
 305 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 306 ; GCN-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 307 ; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 308 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 309 ; GCN-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 310 ; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 311 ; GCN-NSZ-NEXT:    ; return to shader part epilog
 312 ;
 313 ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
 314 ; VI-SAFE:       ; %bb.0: ; %.entry
 315 ; VI-SAFE-NEXT:    v_rcp_f32_e32 v0, s1
 316 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
 317 ; VI-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
 318 ; VI-SAFE-NEXT:    v_add_f32_e32 v0, 0, v0
 319 ; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 320 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 321 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 322 ; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 323 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 324 ; VI-SAFE-NEXT:    ; return to shader part epilog
 325 .entry:
 326   %tmp7 = fdiv afn float 1.000000e+00, %tmp6
 327   %tmp8 = fmul float 0.000000e+00, %tmp7
 328   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
 329   %.i188 = fadd float %tmp9, 0.000000e+00
 330   %tmp10 = fcmp uge float %.i188, %tmp2
 331   %tmp11 = fneg float %.i188
 332   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
 333   %tmp12 = fcmp ule float %.i092, 0.000000e+00
 334   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
 335   ret float %.i198
 336 }
 337
 338 define double @v_fneg_add_f64(double %a, double %b) #0 {
 339 ; GCN-SAFE-LABEL: v_fneg_add_f64:
 340 ; GCN-SAFE:       ; %bb.0:
 341 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 342 ; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 343 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 344 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 345 ;
 346 ; GCN-NSZ-LABEL: v_fneg_add_f64:
 347 ; GCN-NSZ:       ; %bb.0:
 348 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 349 ; GCN-NSZ-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
 350 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 351   %add = fadd double %a, %b
 352   %fneg = fneg double %add
 353   ret double %fneg
 354 }
 355
 356 define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) #0 {
 357 ; GCN-LABEL: v_fneg_add_store_use_add_f64:
 358 ; GCN:       ; %bb.0:
 359 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 360 ; GCN-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
 361 ; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
 362 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
 363 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 364   %add = fadd double %a, %b
 365   %fneg = fneg double %add
 366   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
 367   %insert.1 = insertvalue { double, double } %insert.0, double %add, 1
 368   ret { double, double } %insert.1
 369 }
 370
 371 define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) #0 {
 372 ; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f64:
 373 ; SI-SAFE:       ; %bb.0:
 374 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 375 ; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 376 ; SI-SAFE-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
 377 ; SI-SAFE-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
 378 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, v4
 379 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 380 ;
 381 ; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f64:
 382 ; GCN-NSZ:       ; %bb.0:
 383 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 384 ; GCN-NSZ-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
 385 ; GCN-NSZ-NEXT:    v_mul_f64 v[2:3], v[0:1], -4.0
 386 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 387 ;
 388 ; VI-SAFE-LABEL: v_fneg_add_multi_use_add_f64:
 389 ; VI-SAFE:       ; %bb.0:
 390 ; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 391 ; VI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 392 ; VI-SAFE-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
 393 ; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 394 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 395   %add = fadd double %a, %b
 396   %fneg = fneg double %add
 397   %use1 = fmul double %add, 4.0
 398
 399   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
 400   %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1
 401   ret { double, double } %insert.1
 402 }
 403
 404 define double @v_fneg_add_fneg_x_f64(double %a, double %b) #0 {
 405 ; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f64:
 406 ; GCN-SAFE:       ; %bb.0:
 407 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 408 ; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[2:3], -v[0:1]
 409 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 410 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 411 ;
 412 ; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f64:
 413 ; GCN-NSZ:       ; %bb.0:
 414 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 415 ; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
 416 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 417   %fneg.a = fneg double %a
 418   %add = fadd double %fneg.a, %b
 419   %fneg = fneg double %add
 420   ret double %fneg
 421 }
 422
 423 define double @v_fneg_add_x_fneg_f64(double %a, double %b) #0 {
 424 ; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f64:
 425 ; GCN-SAFE:       ; %bb.0:
 426 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 427 ; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
 428 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 429 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 430 ;
 431 ; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f64:
 432 ; GCN-NSZ:       ; %bb.0:
 433 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 434 ; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[2:3], -v[0:1]
 435 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 436   %fneg.b = fneg double %b
 437   %add = fadd double %a, %fneg.b
 438   %fneg = fneg double %add
 439   ret double %fneg
 440 }
 441
 442 define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) #0 {
 443 ; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f64:
 444 ; GCN-SAFE:       ; %bb.0:
 445 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 446 ; GCN-SAFE-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
 447 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 448 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
 449 ;
 450 ; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f64:
 451 ; GCN-NSZ:       ; %bb.0:
 452 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 453 ; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 454 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 455   %fneg.a = fneg double %a
 456   %fneg.b = fneg double %b
 457   %add = fadd double %fneg.a, %fneg.b
 458   %fneg = fneg double %add
 459   ret double %fneg
 460 }
 461
 462 define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b) #0 {
 463 ; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64:
 464 ; SI-SAFE:       ; %bb.0:
 465 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 466 ; SI-SAFE-NEXT:    v_mov_b32_e32 v5, v1
 467 ; SI-SAFE-NEXT:    v_mov_b32_e32 v4, v0
 468 ; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[2:3], -v[4:5]
 469 ; SI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v5
 470 ; SI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 471 ; SI-SAFE-NEXT:    v_mov_b32_e32 v2, v4
 472 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 473 ;
 474 ; SI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64:
 475 ; SI-NSZ:       ; %bb.0:
 476 ; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 477 ; SI-NSZ-NEXT:    v_mov_b32_e32 v5, v1
 478 ; SI-NSZ-NEXT:    v_mov_b32_e32 v4, v0
 479 ; SI-NSZ-NEXT:    v_add_f64 v[0:1], v[4:5], -v[2:3]
 480 ; SI-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v5
 481 ; SI-NSZ-NEXT:    v_mov_b32_e32 v2, v4
 482 ; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 483 ;
 484 ; VI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64:
 485 ; VI-SAFE:       ; %bb.0:
 486 ; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 487 ; VI-SAFE-NEXT:    v_add_f64 v[4:5], v[2:3], -v[0:1]
 488 ; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
 489 ; VI-SAFE-NEXT:    v_mov_b32_e32 v2, v0
 490 ; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v5
 491 ; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v4
 492 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 493 ;
 494 ; VI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64:
 495 ; VI-NSZ:       ; %bb.0:
 496 ; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 497 ; VI-NSZ-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
 498 ; VI-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
 499 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, v0
 500 ; VI-NSZ-NEXT:    v_mov_b32_e32 v0, v4
 501 ; VI-NSZ-NEXT:    v_mov_b32_e32 v1, v5
 502 ; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 503   %fneg.a = fneg double %a
 504   %add = fadd double %fneg.a, %b
 505   %fneg = fneg double %add
 506   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
 507   %insert.1 = insertvalue { double, double } %insert.0, double %fneg.a, 1
 508   ret { double, double } %insert.1
 509 }
 510
 511 define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b, double %c) #0 {
 512 ; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64:
 513 ; SI-SAFE:       ; %bb.0:
 514 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 515 ; SI-SAFE-NEXT:    v_add_f64 v[6:7], v[2:3], -v[0:1]
 516 ; SI-SAFE-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
 517 ; SI-SAFE-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
 518 ; SI-SAFE-NEXT:    v_mov_b32_e32 v0, v6
 519 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, v7
 520 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 521 ;
 522 ; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f64:
 523 ; GCN-NSZ:       ; %bb.0:
 524 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 525 ; GCN-NSZ-NEXT:    v_add_f64 v[6:7], v[0:1], -v[2:3]
 526 ; GCN-NSZ-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
 527 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v6
 528 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v7
 529 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
 530 ;
 531 ; VI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64:
 532 ; VI-SAFE:       ; %bb.0:
 533 ; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 534 ; VI-SAFE-NEXT:    v_add_f64 v[6:7], v[2:3], -v[0:1]
 535 ; VI-SAFE-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
 536 ; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v7
 537 ; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v6
 538 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 539   %fneg.a = fneg double %a
 540   %add = fadd double %fneg.a, %b
 541   %fneg = fneg double %add
 542   %use1 = fmul double %fneg.a, %c
 543
 544   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
 545   %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1
 546   ret { double, double } %insert.1
 547 }
 548
 549 ; This one asserted with -enable-no-signed-zeros-fp-math
 550 define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #0 {
 551 ; SI-SAFE-LABEL: fneg_fadd_0_f64:
 552 ; SI-SAFE:       ; %bb.0: ; %.entry
 553 ; SI-SAFE-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
 554 ; SI-SAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 555 ; SI-SAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 556 ; SI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
 557 ; SI-SAFE-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
 558 ; SI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
 559 ; SI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 560 ; SI-SAFE-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
 561 ; SI-SAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
 562 ; SI-SAFE-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
 563 ; SI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
 564 ; SI-SAFE-NEXT:    v_mov_b32_e32 v3, s0
 565 ; SI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
 566 ; SI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
 567 ; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
 568 ; SI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
 569 ; SI-SAFE-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
 570 ; SI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
 571 ; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 572 ; SI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 573 ; SI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
 574 ; SI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 575 ; SI-SAFE-NEXT:    s_mov_b32 s0, 0
 576 ; SI-SAFE-NEXT:    ; return to shader part epilog
 577 ;
 578 ; SI-NSZ-LABEL: fneg_fadd_0_f64:
 579 ; SI-NSZ:       ; %bb.0: ; %.entry
 580 ; SI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
 581 ; SI-NSZ-NEXT:    s_mov_b32 s4, 0
 582 ; SI-NSZ-NEXT:    s_brev_b32 s5, 1
 583 ; SI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 584 ; SI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 585 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
 586 ; SI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
 587 ; SI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
 588 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 589 ; SI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
 590 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
 591 ; SI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
 592 ; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
 593 ; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
 594 ; SI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
 595 ; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[4:5]
 596 ; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 597 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 598 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 599 ; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 600 ; SI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
 601 ; SI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 602 ; SI-NSZ-NEXT:    s_mov_b32 s0, 0
 603 ; SI-NSZ-NEXT:    ; return to shader part epilog
 604 ;
 605 ; VI-SAFE-LABEL: fneg_fadd_0_f64:
 606 ; VI-SAFE:       ; %bb.0: ; %.entry
 607 ; VI-SAFE-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
 608 ; VI-SAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 609 ; VI-SAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 610 ; VI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
 611 ; VI-SAFE-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
 612 ; VI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
 613 ; VI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 614 ; VI-SAFE-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
 615 ; VI-SAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
 616 ; VI-SAFE-NEXT:    v_mov_b32_e32 v4, s0
 617 ; VI-SAFE-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
 618 ; VI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
 619 ; VI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
 620 ; VI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
 621 ; VI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
 622 ; VI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
 623 ; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
 624 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 625 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 626 ; VI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 627 ; VI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
 628 ; VI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 629 ; VI-SAFE-NEXT:    s_mov_b32 s0, 0
 630 ; VI-SAFE-NEXT:    ; return to shader part epilog
 631 ;
 632 ; VI-NSZ-LABEL: fneg_fadd_0_f64:
 633 ; VI-NSZ:       ; %bb.0: ; %.entry
 634 ; VI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
 635 ; VI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 636 ; VI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 637 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
 638 ; VI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
 639 ; VI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
 640 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 641 ; VI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
 642 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
 643 ; VI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
 644 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
 645 ; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
 646 ; VI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
 647 ; VI-NSZ-NEXT:    s_mov_b32 s2, 0
 648 ; VI-NSZ-NEXT:    s_brev_b32 s3, 1
 649 ; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
 650 ; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 651 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 652 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 653 ; VI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 654 ; VI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
 655 ; VI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 656 ; VI-NSZ-NEXT:    s_mov_b32 s0, 0
 657 ; VI-NSZ-NEXT:    ; return to shader part epilog
 658 .entry:
 659   %tmp7 = fdiv double 1.000000e+00, %tmp6
 660   %tmp8 = fmul double 0.000000e+00, %tmp7
 661   %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8
 662   %.i188 = fadd double %tmp9, 0.000000e+00
 663   %tmp10 = fcmp uge double %.i188, %tmp2
 664   %tmp11 = fneg double %.i188
 665   %.i092 = select i1 %tmp10, double %tmp2, double %tmp11
 666   %tmp12 = fcmp ule double %.i092, 0.000000e+00
 667   %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000
 668   ret double %.i198
 669 }
 670
 671 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
 672 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 673 ; when that is done.
 674 define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 {
 675 ; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f64:
 676 ; GCN-SAFE:       ; %bb.0: ; %.entry
 677 ; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[2:3], s[0:1], 0
 678 ; GCN-SAFE-NEXT:    s_and_b64 s[2:3], s[2:3], exec
 679 ; GCN-SAFE-NEXT:    s_cselect_b32 s1, s1, 0x80000000
 680 ; GCN-SAFE-NEXT:    s_cselect_b32 s0, s0, 0
 681 ; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[0:1], s[0:1], 0
 682 ; GCN-SAFE-NEXT:    s_and_b64 s[0:1], s[0:1], exec
 683 ; GCN-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 684 ; GCN-SAFE-NEXT:    s_mov_b32 s0, 0
 685 ; GCN-SAFE-NEXT:    ; return to shader part epilog
 686 ;
 687 ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
 688 ; SI-NSZ:       ; %bb.0: ; %.entry
 689 ; SI-NSZ-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
 690 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 691 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 692 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 693 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 694 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 695 ; SI-NSZ-NEXT:    s_mov_b32 s2, 0
 696 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 697 ; SI-NSZ-NEXT:    s_brev_b32 s3, 1
 698 ; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
 699 ; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
 700 ; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 701 ; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
 702 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 703 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 704 ; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 705 ; SI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
 706 ; SI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 707 ; SI-NSZ-NEXT:    s_mov_b32 s0, 0
 708 ; SI-NSZ-NEXT:    ; return to shader part epilog
 709 ;
 710 ; VI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
 711 ; VI-NSZ:       ; %bb.0: ; %.entry
 712 ; VI-NSZ-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
 713 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 714 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 715 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 716 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 717 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 718 ; VI-NSZ-NEXT:    s_mov_b32 s2, 0
 719 ; VI-NSZ-NEXT:    s_brev_b32 s3, 1
 720 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 721 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
 722 ; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
 723 ; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
 724 ; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 725 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 726 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 727 ; VI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
 728 ; VI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
 729 ; VI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 730 ; VI-NSZ-NEXT:    s_mov_b32 s0, 0
 731 ; VI-NSZ-NEXT:    ; return to shader part epilog
 732 .entry:
 733   %tmp7 = fdiv afn double 1.000000e+00, %tmp6
 734   %tmp8 = fmul double 0.000000e+00, %tmp7
 735   %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8
 736   %.i188 = fadd double %tmp9, 0.000000e+00
 737   %tmp10 = fcmp uge double %.i188, %tmp2
 738   %tmp11 = fneg double %.i188
 739   %.i092 = select i1 %tmp10, double %tmp2, double %tmp11
 740   %tmp12 = fcmp ule double %.i092, 0.000000e+00
 741   %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000
 742   ret double %.i198
 743 }
 744
 745 ; --------------------------------------------------------------------------------
 746 ; fmul tests
 747 ; --------------------------------------------------------------------------------
 748
 749 define float @v_fneg_mul_f32(float %a, float %b) #0 {
 750 ; GCN-LABEL: v_fneg_mul_f32:
 751 ; GCN:       ; %bb.0:
 752 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 753 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
 754 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 755   %mul = fmul float %a, %b
 756   %fneg = fneg float %mul
 757   ret float %fneg
 758 }
 759
 760 define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) #0 {
 761 ; GCN-LABEL: v_fneg_mul_store_use_mul_f32:
 762 ; GCN:       ; %bb.0:
 763 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 764 ; GCN-NEXT:    v_mul_f32_e32 v1, v0, v1
 765 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
 766 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 767   %mul = fmul float %a, %b
 768   %fneg = fneg float %mul
 769   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 770   %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
 771   ret { float, float } %insert.1
 772 }
 773
 774 define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) #0 {
 775 ; GCN-LABEL: v_fneg_mul_multi_use_mul_f32:
 776 ; GCN:       ; %bb.0:
 777 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 778 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
 779 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 780 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 781   %mul = fmul float %a, %b
 782   %fneg = fneg float %mul
 783   %use1 = fmul float %mul, 4.0
 784   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 785   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
 786   ret { float, float } %insert.1
 787 }
 788
 789 define float @v_fneg_mul_fneg_x_f32(float %a, float %b) #0 {
 790 ; GCN-LABEL: v_fneg_mul_fneg_x_f32:
 791 ; GCN:       ; %bb.0:
 792 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 793 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 794 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 795   %fneg.a = fneg float %a
 796   %mul = fmul float %fneg.a, %b
 797   %fneg = fneg float %mul
 798   ret float %fneg
 799 }
 800
 801 define float @v_fneg_mul_x_fneg_f32(float %a, float %b) #0 {
 802 ; GCN-LABEL: v_fneg_mul_x_fneg_f32:
 803 ; GCN:       ; %bb.0:
 804 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 805 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 806 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 807   %fneg.b = fneg float %b
 808   %mul = fmul float %a, %fneg.b
 809   %fneg = fneg float %mul
 810   ret float %fneg
 811 }
 812
 813 define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) #0 {
 814 ; GCN-LABEL: v_fneg_mul_fneg_fneg_f32:
 815 ; GCN:       ; %bb.0:
 816 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 817 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
 818 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 819   %fneg.a = fneg float %a
 820   %fneg.b = fneg float %b
 821   %mul = fmul float %fneg.a, %fneg.b
 822   %fneg = fneg float %mul
 823   ret float %fneg
 824 }
 825
 826 define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) #0 {
 827 ; GCN-LABEL: v_fneg_mul_store_use_fneg_x_f32:
 828 ; GCN:       ; %bb.0:
 829 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 830 ; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 831 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 832 ; GCN-NEXT:    v_mov_b32_e32 v1, v2
 833 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 834   %fneg.a = fneg float %a
 835   %mul = fmul float %fneg.a, %b
 836   %fneg = fneg float %mul
 837   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 838   %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
 839   ret { float, float } %insert.1
 840 }
 841
 842 define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
 843 ; GCN-LABEL: v_fneg_mul_multi_use_fneg_x_f32:
 844 ; GCN:       ; %bb.0:
 845 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 846 ; GCN-NEXT:    v_mul_f32_e32 v3, v0, v1
 847 ; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
 848 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
 849 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 850   %fneg.a = fneg float %a
 851   %mul = fmul float %fneg.a, %b
 852   %fneg = fneg float %mul
 853   %use1 = fmul float %fneg.a, %c
 854   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
 855   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
 856   ret { float, float } %insert.1
 857 }
 858
 859 ; --------------------------------------------------------------------------------
 860 ; fminnum tests
 861 ; --------------------------------------------------------------------------------
 862
 863 define float @v_fneg_minnum_f32_ieee(float %a, float %b) #0 {
 864 ; GCN-LABEL: v_fneg_minnum_f32_ieee:
 865 ; GCN:       ; %bb.0:
 866 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 867 ; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
 868 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
 869 ; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
 870 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 871   %min = call float @llvm.minnum.f32(float %a, float %b)
 872   %fneg = fneg float %min
 873   ret float %fneg
 874 }
 875
 876 define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #4 {
 877 ; GCN-LABEL: v_fneg_minnum_f32_no_ieee:
 878 ; GCN:       ; %bb.0:
 879 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 880 ; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 881 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 882   %min = call float @llvm.minnum.f32(float %a, float %b)
 883   %fneg = fneg float %min
 884   ret float %fneg
 885 }
 886
 887 define float @v_fneg_self_minnum_f32_ieee(float %a) #0 {
 888 ; GCN-LABEL: v_fneg_self_minnum_f32_ieee:
 889 ; GCN:       ; %bb.0:
 890 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 891 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 892 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 893   %min = call float @llvm.minnum.f32(float %a, float %a)
 894   %min.fneg = fneg float %min
 895   ret float %min.fneg
 896 }
 897
 898 define float @v_fneg_self_minnum_f32_no_ieee(float %a) #4 {
 899 ; GCN-LABEL: v_fneg_self_minnum_f32_no_ieee:
 900 ; GCN:       ; %bb.0:
 901 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 902 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 903 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 904   %min = call float @llvm.minnum.f32(float %a, float %a)
 905   %min.fneg = fneg float %min
 906   ret float %min.fneg
 907 }
 908
 909 define float @v_fneg_posk_minnum_f32_ieee(float %a) #0 {
 910 ; GCN-LABEL: v_fneg_posk_minnum_f32_ieee:
 911 ; GCN:       ; %bb.0:
 912 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 913 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
 914 ; GCN-NEXT:    v_max_f32_e32 v0, -4.0, v0
 915 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 916   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 917   %fneg = fneg float %min
 918   ret float %fneg
 919 }
 920
 921 define float @v_fneg_posk_minnum_f32_no_ieee(float %a) #4 {
 922 ; GCN-LABEL: v_fneg_posk_minnum_f32_no_ieee:
 923 ; GCN:       ; %bb.0:
 924 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 925 ; GCN-NEXT:    v_max_f32_e64 v0, -v0, -4.0
 926 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 927   %min = call float @llvm.minnum.f32(float 4.0, float %a)
 928   %fneg = fneg float %min
 929   ret float %fneg
 930 }
 931
 932 define float @v_fneg_negk_minnum_f32_ieee(float %a) #0 {
 933 ; GCN-LABEL: v_fneg_negk_minnum_f32_ieee:
 934 ; GCN:       ; %bb.0:
 935 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 936 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
 937 ; GCN-NEXT:    v_max_f32_e32 v0, 4.0, v0
 938 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 939   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 940   %fneg = fneg float %min
 941   ret float %fneg
 942 }
 943
 944 define float @v_fneg_negk_minnum_f32_no_ieee(float %a) #4 {
 945 ; GCN-LABEL: v_fneg_negk_minnum_f32_no_ieee:
 946 ; GCN:       ; %bb.0:
 947 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 948 ; GCN-NEXT:    v_max_f32_e64 v0, -v0, 4.0
 949 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 950   %min = call float @llvm.minnum.f32(float -4.0, float %a)
 951   %fneg = fneg float %min
 952   ret float %fneg
 953 }
 954
 955 define float @v_fneg_0_minnum_f32(float %a) #0 {
 956 ; GCN-LABEL: v_fneg_0_minnum_f32:
 957 ; GCN:       ; %bb.0:
 958 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 959 ; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
 960 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 961 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 962   %min = call nnan float @llvm.minnum.f32(float 0.0, float %a)
 963   %fneg = fneg float %min
 964   ret float %fneg
 965 }
 966
 967 define float @v_fneg_neg0_minnum_f32_ieee(float %a) #0 {
 968 ; GCN-LABEL: v_fneg_neg0_minnum_f32_ieee:
 969 ; GCN:       ; %bb.0:
 970 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 971 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
 972 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
 973 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 974   %min = call float @llvm.minnum.f32(float -0.0, float %a)
 975   %fneg = fneg float %min
 976   ret float %fneg
 977 }
 978
 979 define float @v_fneg_inv2pi_minnum_f32(float %a) #0 {
 980 ; SI-LABEL: v_fneg_inv2pi_minnum_f32:
 981 ; SI:       ; %bb.0:
 982 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 983 ; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
 984 ; SI-NEXT:    v_max_f32_e32 v0, 0xbe22f983, v0
 985 ; SI-NEXT:    s_setpc_b64 s[30:31]
 986 ;
 987 ; VI-LABEL: v_fneg_inv2pi_minnum_f32:
 988 ; VI:       ; %bb.0:
 989 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 990 ; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 991 ; VI-NEXT:    v_min_f32_e32 v0, 0.15915494, v0
 992 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 993 ; VI-NEXT:    s_setpc_b64 s[30:31]
 994   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
 995   %fneg = fneg float %min
 996   ret float %fneg
 997 }
 998
 999 define float @v_fneg_neg_inv2pi_minnum_f32(float %a) #0 {
1000 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f32:
1001 ; SI:       ; %bb.0:
1002 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003 ; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1004 ; SI-NEXT:    v_max_f32_e32 v0, 0x3e22f983, v0
1005 ; SI-NEXT:    s_setpc_b64 s[30:31]
1006 ;
1007 ; VI-LABEL: v_fneg_neg_inv2pi_minnum_f32:
1008 ; VI:       ; %bb.0:
1009 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010 ; VI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1011 ; VI-NEXT:    v_max_f32_e32 v0, 0.15915494, v0
1012 ; VI-NEXT:    s_setpc_b64 s[30:31]
1013   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
1014   %fneg = fneg float %min
1015   ret float %fneg
1016 }
1017
1018 define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
1019 ; SI-LABEL: v_fneg_inv2pi_minnum_f16:
1020 ; SI:       ; %bb.0:
1021 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022 ; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1023 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1024 ; SI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1025 ; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
1026 ; SI-NEXT:    s_setpc_b64 s[30:31]
1027 ;
1028 ; VI-LABEL: v_fneg_inv2pi_minnum_f16:
1029 ; VI:       ; %bb.0:
1030 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031 ; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1032 ; VI-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1033 ; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1034 ; VI-NEXT:    s_setpc_b64 s[30:31]
1035   %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
1036   %fneg = fneg half %min
1037   ret half %fneg
1038 }
1039
1040 define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
1041 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1042 ; SI:       ; %bb.0:
1043 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1045 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1046 ; SI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1047 ; SI-NEXT:    v_max_f32_e32 v0, 0x3e230000, v0
1048 ; SI-NEXT:    s_setpc_b64 s[30:31]
1049 ;
1050 ; VI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1051 ; VI:       ; %bb.0:
1052 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053 ; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1054 ; VI-NEXT:    v_max_f16_e32 v0, 0.15915494, v0
1055 ; VI-NEXT:    s_setpc_b64 s[30:31]
1056   %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
1057   %fneg = fneg half %min
1058   ret half %fneg
1059 }
1060
1061 define double @v_fneg_inv2pi_minnum_f64(double %a) #0 {
1062 ; SI-LABEL: v_fneg_inv2pi_minnum_f64:
1063 ; SI:       ; %bb.0:
1064 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1065 ; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1066 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
1067 ; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
1068 ; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
1069 ; SI-NEXT:    s_setpc_b64 s[30:31]
1070 ;
1071 ; VI-LABEL: v_fneg_inv2pi_minnum_f64:
1072 ; VI:       ; %bb.0:
1073 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; VI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1075 ; VI-NEXT:    v_min_f64 v[0:1], v[0:1], 0.15915494309189532
1076 ; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1077 ; VI-NEXT:    s_setpc_b64 s[30:31]
1078   %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
1079   %fneg = fneg double %min
1080   ret double %fneg
1081 }
1082
1083 define double @v_fneg_neg_inv2pi_minnum_f64(double %a) #0 {
1084 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f64:
1085 ; SI:       ; %bb.0:
1086 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1088 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
1089 ; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
1090 ; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
1091 ; SI-NEXT:    s_setpc_b64 s[30:31]
1092 ;
1093 ; VI-LABEL: v_fneg_neg_inv2pi_minnum_f64:
1094 ; VI:       ; %bb.0:
1095 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1096 ; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1097 ; VI-NEXT:    v_max_f64 v[0:1], v[0:1], 0.15915494309189532
1098 ; VI-NEXT:    s_setpc_b64 s[30:31]
1099   %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
1100   %fneg = fneg double %min
1101   ret double %fneg
1102 }
1103
1104 define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #4 {
1105 ; GCN-LABEL: v_fneg_neg0_minnum_f32_no_ieee:
1106 ; GCN:       ; %bb.0:
1107 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GCN-NEXT:    v_max_f32_e64 v0, -v0, 0
1109 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1110   %min = call float @llvm.minnum.f32(float -0.0, float %a)
1111   %fneg = fneg float %min
1112   ret float %fneg
1113 }
1114
1115 define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) #0 {
1116 ; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_ieee:
1117 ; GCN:       ; %bb.0:
1118 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119 ; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1120 ; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1121 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1122 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1123   %min = call float @llvm.minnum.f32(float 0.0, float %a)
1124   %fneg = fneg float %min
1125   %mul = fmul float %fneg, %b
1126   ret float %mul
1127 }
1128
1129 define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) #0 {
1130 ; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32:
1131 ; SI:       ; %bb.0:
1132 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133 ; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1134 ; SI-NEXT:    v_max_f32_e32 v0, 0xbe22f983, v0
1135 ; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
1136 ; SI-NEXT:    s_setpc_b64 s[30:31]
1137 ;
1138 ; VI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32:
1139 ; VI:       ; %bb.0:
1140 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1141 ; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1142 ; VI-NEXT:    v_min_f32_e32 v0, 0.15915494, v0
1143 ; VI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1144 ; VI-NEXT:    s_setpc_b64 s[30:31]
1145   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
1146   %fneg = fneg float %min
1147   %mul = fmul float %fneg, %b
1148   ret float %mul
1149 }
1150
1151 define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
1152 ; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_no_ieee:
1153 ; GCN:       ; %bb.0:
1154 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155 ; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1156 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1157 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1158   %min = call float @llvm.minnum.f32(float 0.0, float %a)
1159   %fneg = fneg float %min
1160   %mul = fmul float %fneg, %b
1161   ret float %mul
1162 }
1163
1164 define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float %b) #0 {
1165 ; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_ieee:
1166 ; GCN:       ; %bb.0:
1167 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1169 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1170 ; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
1171 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1172 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1173   %min = call float @llvm.minnum.f32(float %a, float %b)
1174   %fneg = fneg float %min
1175   %use1 = fmul float %min, 4.0
1176   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1177   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1178   ret { float, float } %insert.1
1179 }
1180
1181 define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #4 {
1182 ; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_no_ieee:
1183 ; GCN:       ; %bb.0:
1184 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
1186 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1187 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1188   %min = call float @llvm.minnum.f32(float %a, float %b)
1189   %fneg = fneg float %min
1190   %use1 = fmul float %min, 4.0
1191   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1192   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1193   ret <2 x float> %ins1
1194 }
1195
1196 ; --------------------------------------------------------------------------------
1197 ; fmaxnum tests
1198 ; --------------------------------------------------------------------------------
1199
1200 define float @v_fneg_maxnum_f32_ieee(float %a, float %b) #0 {
1201 ; GCN-LABEL: v_fneg_maxnum_f32_ieee:
1202 ; GCN:       ; %bb.0:
1203 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1205 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1206 ; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
1207 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1208   %max = call float @llvm.maxnum.f32(float %a, float %b)
1209   %fneg = fneg float %max
1210   ret float %fneg
1211 }
1212
1213 define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #4 {
1214 ; GCN-LABEL: v_fneg_maxnum_f32_no_ieee:
1215 ; GCN:       ; %bb.0:
1216 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1217 ; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
1218 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1219   %max = call float @llvm.maxnum.f32(float %a, float %b)
1220   %fneg = fneg float %max
1221   ret float %fneg
1222 }
1223
1224 define float @v_fneg_self_maxnum_f32_ieee(float %a) #0 {
1225 ; GCN-LABEL: v_fneg_self_maxnum_f32_ieee:
1226 ; GCN:       ; %bb.0:
1227 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1229 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1230   %max = call float @llvm.maxnum.f32(float %a, float %a)
1231   %max.fneg = fneg float %max
1232   ret float %max.fneg
1233 }
1234
1235 define float @v_fneg_self_maxnum_f32_no_ieee(float %a) #4 {
1236 ; GCN-LABEL: v_fneg_self_maxnum_f32_no_ieee:
1237 ; GCN:       ; %bb.0:
1238 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1240 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1241   %max = call float @llvm.maxnum.f32(float %a, float %a)
1242   %max.fneg = fneg float %max
1243   ret float %max.fneg
1244 }
1245
1246 define float @v_fneg_posk_maxnum_f32_ieee(float %a) #0 {
1247 ; GCN-LABEL: v_fneg_posk_maxnum_f32_ieee:
1248 ; GCN:       ; %bb.0:
1249 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1251 ; GCN-NEXT:    v_min_f32_e32 v0, -4.0, v0
1252 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1253   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
1254   %fneg = fneg float %max
1255   ret float %fneg
1256 }
1257
1258 define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #4 {
1259 ; GCN-LABEL: v_fneg_posk_maxnum_f32_no_ieee:
1260 ; GCN:       ; %bb.0:
1261 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; GCN-NEXT:    v_min_f32_e64 v0, -v0, -4.0
1263 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1264   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
1265   %fneg = fneg float %max
1266   ret float %fneg
1267 }
1268
1269 define float @v_fneg_negk_maxnum_f32_ieee(float %a) #0 {
1270 ; GCN-LABEL: v_fneg_negk_maxnum_f32_ieee:
1271 ; GCN:       ; %bb.0:
1272 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1274 ; GCN-NEXT:    v_min_f32_e32 v0, 4.0, v0
1275 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1276   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
1277   %fneg = fneg float %max
1278   ret float %fneg
1279 }
1280
1281 define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #4 {
1282 ; GCN-LABEL: v_fneg_negk_maxnum_f32_no_ieee:
1283 ; GCN:       ; %bb.0:
1284 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GCN-NEXT:    v_min_f32_e64 v0, -v0, 4.0
1286 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1287   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
1288   %fneg = fneg float %max
1289   ret float %fneg
1290 }
1291
1292 define float @v_fneg_0_maxnum_f32(float %a) #0 {
1293 ; GCN-LABEL: v_fneg_0_maxnum_f32:
1294 ; GCN:       ; %bb.0:
1295 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1297 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1298 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1299   %max = call nnan float @llvm.maxnum.f32(float 0.0, float %a)
1300   %fneg = fneg float %max
1301   ret float %fneg
1302 }
1303
1304 define float @v_fneg_neg0_maxnum_f32_ieee(float %a) #0 {
1305 ; GCN-LABEL: v_fneg_neg0_maxnum_f32_ieee:
1306 ; GCN:       ; %bb.0:
1307 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1309 ; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1310 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1311   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1312   %fneg = fneg float %max
1313   ret float %fneg
1314 }
1315
1316 define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #4 {
1317 ; GCN-LABEL: v_fneg_neg0_maxnum_f32_no_ieee:
1318 ; GCN:       ; %bb.0:
1319 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320 ; GCN-NEXT:    v_min_f32_e64 v0, -v0, 0
1321 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1322   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1323   %fneg = fneg float %max
1324   ret float %fneg
1325 }
1326
1327 define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) #0 {
1328 ; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_ieee:
1329 ; GCN:       ; %bb.0:
1330 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331 ; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1332 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1333 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1334 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1335   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1336   %fneg = fneg float %max
1337   %mul = fmul float %fneg, %b
1338   ret float %mul
1339 }
1340
1341 define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
1342 ; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1343 ; GCN:       ; %bb.0:
1344 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1346 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1347 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1348   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1349   %fneg = fneg float %max
1350   %mul = fmul float %fneg, %b
1351   ret float %mul
1352 }
1353
1354 define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float %b) #0 {
1355 ; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1356 ; GCN:       ; %bb.0:
1357 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1358 ; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1359 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1360 ; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
1361 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1362 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1363   %max = call float @llvm.maxnum.f32(float %a, float %b)
1364   %fneg = fneg float %max
1365   %use1 = fmul float %max, 4.0
1366   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1367   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1368   ret { float, float } %insert.1
1369 }
1370
1371 define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #4 {
1372 ; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1373 ; GCN:       ; %bb.0:
1374 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375 ; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
1376 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1377 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1378   %max = call float @llvm.maxnum.f32(float %a, float %b)
1379   %fneg = fneg float %max
1380   %use1 = fmul float %max, 4.0
1381   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1382   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1383   ret <2 x float> %ins1
1384 }
1385
1386 ; --------------------------------------------------------------------------------
1387 ; fma tests
1388 ; --------------------------------------------------------------------------------
1389
1390 define float @v_fneg_fma_f32(float %a, float %b, float %c) #0 {
1391 ; GCN-SAFE-LABEL: v_fneg_fma_f32:
1392 ; GCN-SAFE:       ; %bb.0:
1393 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394 ; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
1395 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1396 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1397 ;
1398 ; GCN-NSZ-LABEL: v_fneg_fma_f32:
1399 ; GCN-NSZ:       ; %bb.0:
1400 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1402 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1403   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1404   %fneg = fneg float %fma
1405   ret float %fneg
1406 }
1407
1408 define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float %c) #0 {
1409 ; GCN-LABEL: v_fneg_fma_store_use_fma_f32:
1410 ; GCN:       ; %bb.0:
1411 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1412 ; GCN-NEXT:    v_fma_f32 v1, v0, v1, v2
1413 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1414 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1415   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1416   %fneg = fneg float %fma
1417   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1418   %insert.1 = insertvalue { float, float } %insert.0, float %fma, 1
1419   ret { float, float } %insert.1
1420 }
1421
1422 define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float %c) #0 {
1423 ; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fma_f32:
1424 ; GCN-SAFE:       ; %bb.0:
1425 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426 ; GCN-SAFE-NEXT:    v_fma_f32 v1, v0, v1, v2
1427 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1428 ; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
1429 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1430 ;
1431 ; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fma_f32:
1432 ; GCN-NSZ:       ; %bb.0:
1433 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1434 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1435 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1436 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1437   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1438   %fneg = fneg float %fma
1439   %use1 = fmul float %fma, 4.0
1440   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1441   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1442   ret { float, float } %insert.1
1443 }
1444
1445 define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) #0 {
1446 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_y_f32:
1447 ; GCN-SAFE:       ; %bb.0:
1448 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449 ; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, v2
1450 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1451 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1452 ;
1453 ; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_y_f32:
1454 ; GCN-NSZ:       ; %bb.0:
1455 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1457 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1458   %fneg.a = fneg float %a
1459   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1460   %fneg = fneg float %fma
1461   ret float %fneg
1462 }
1463
1464 define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) #0 {
1465 ; GCN-SAFE-LABEL: v_fneg_fma_x_fneg_y_f32:
1466 ; GCN-SAFE:       ; %bb.0:
1467 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468 ; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, -v1, v2
1469 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1470 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1471 ;
1472 ; GCN-NSZ-LABEL: v_fneg_fma_x_fneg_y_f32:
1473 ; GCN-NSZ:       ; %bb.0:
1474 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1475 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1476 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1477   %fneg.b = fneg float %b
1478   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1479   %fneg = fneg float %fma
1480   ret float %fneg
1481 }
1482
1483 define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) #0 {
1484 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f32:
1485 ; GCN-SAFE:       ; %bb.0:
1486 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487 ; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
1488 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1489 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1490 ;
1491 ; GCN-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f32:
1492 ; GCN-NSZ:       ; %bb.0:
1493 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1495 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1496   %fneg.a = fneg float %a
1497   %fneg.b = fneg float %b
1498   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1499   %fneg = fneg float %fma
1500   ret float %fneg
1501 }
1502
1503 define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) #0 {
1504 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f32:
1505 ; GCN-SAFE:       ; %bb.0:
1506 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507 ; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, -v2
1508 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1509 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1510 ;
1511 ; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f32:
1512 ; GCN-NSZ:       ; %bb.0:
1513 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1514 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, v2
1515 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1516   %fneg.a = fneg float %a
1517   %fneg.c = fneg float %c
1518   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1519   %fneg = fneg float %fma
1520   ret float %fneg
1521 }
1522
1523 define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) #0 {
1524 ; GCN-SAFE-LABEL: v_fneg_fma_x_y_fneg_f32:
1525 ; GCN-SAFE:       ; %bb.0:
1526 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1527 ; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, -v2
1528 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1529 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1530 ;
1531 ; GCN-NSZ-LABEL: v_fneg_fma_x_y_fneg_f32:
1532 ; GCN-NSZ:       ; %bb.0:
1533 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, v2
1535 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1536   %fneg.c = fneg float %c
1537   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1538   %fneg = fneg float %fma
1539   ret float %fneg
1540 }
1541
1542 define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, float %c) #0 {
1543 ; GCN-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f32:
1544 ; GCN-SAFE:       ; %bb.0:
1545 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1546 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v0
1547 ; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, v2
1548 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1549 ; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, v3
1550 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1551 ;
1552 ; GCN-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f32:
1553 ; GCN-NSZ:       ; %bb.0:
1554 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GCN-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v0
1556 ; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1557 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v3
1558 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1559   %fneg.a = fneg float %a
1560   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1561   %fneg = fneg float %fma
1562   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1563   %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
1564   ret { float, float } %insert.1
1565 }
1566
1567 define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, float %c, float %d) #0 {
1568 ; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32:
1569 ; GCN-SAFE:       ; %bb.0:
1570 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571 ; GCN-SAFE-NEXT:    v_fma_f32 v1, -v0, v1, v2
1572 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
1573 ; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v0, v3
1574 ; GCN-SAFE-NEXT:    v_mov_b32_e32 v0, v2
1575 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1576 ;
1577 ; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32:
1578 ; GCN-NSZ:       ; %bb.0:
1579 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580 ; GCN-NSZ-NEXT:    v_fma_f32 v2, v0, v1, -v2
1581 ; GCN-NSZ-NEXT:    v_mul_f32_e64 v1, -v0, v3
1582 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v2
1583 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1584   %fneg.a = fneg float %a
1585   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1586   %fneg = fneg float %fma
1587   %use1 = fmul float %fneg.a, %d
1588   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1589   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1590   ret { float, float } %insert.1
1591 }
1592
1593 ; --------------------------------------------------------------------------------
1594 ; fmad tests
1595 ; --------------------------------------------------------------------------------
1596
1597 define float @v_fneg_fmad_f32(float %a, float %b, float %c) #0 {
1598 ; GCN-SAFE-LABEL: v_fneg_fmad_f32:
1599 ; GCN-SAFE:       ; %bb.0:
1600 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
1602 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
1603 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1604 ;
1605 ; GCN-NSZ-LABEL: v_fneg_fmad_f32:
1606 ; GCN-NSZ:       ; %bb.0:
1607 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608 ; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v1, -v2
1609 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1610   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1611   %fneg = fneg float %fma
1612   ret float %fneg
1613 }
1614
1615 define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
1616 ; GCN-SAFE-LABEL: v_fneg_fmad_v4f32:
1617 ; GCN-SAFE:       ; %bb.0:
1618 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v11, v3, v7
1620 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v10, v2, v6
1621 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v9, v1, v5
1622 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v8, v0, v4
1623 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v8
1624 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v9
1625 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v10
1626 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v11
1627 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1628 ;
1629 ; GCN-NSZ-LABEL: v_fneg_fmad_v4f32:
1630 ; GCN-NSZ:       ; %bb.0:
1631 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v4, -v8
1633 ; GCN-NSZ-NEXT:    v_mad_f32 v1, v1, -v5, -v9
1634 ; GCN-NSZ-NEXT:    v_mad_f32 v2, v2, -v6, -v10
1635 ; GCN-NSZ-NEXT:    v_mad_f32 v3, v3, -v7, -v11
1636 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1637   %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1638   %fneg = fneg <4 x float> %fma
1639   ret <4 x float> %fneg
1640 }
1641
1642 define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, float %c) #0 {
1643 ; GCN-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f32:
1644 ; GCN-SAFE:       ; %bb.0:
1645 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646 ; GCN-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
1647 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
1648 ; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v2
1649 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1650 ;
1651 ; GCN-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f32:
1652 ; GCN-NSZ:       ; %bb.0:
1653 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1654 ; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v1, -v2
1655 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1656 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1657   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1658   %fneg = fneg float %fma
1659   %use1 = fmul float %fma, 4.0
1660   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1661   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1662   ret { float, float } %insert.1
1663 }
1664
1665 ; --------------------------------------------------------------------------------
1666 ; fp_extend tests
1667 ; --------------------------------------------------------------------------------
1668
1669 define double @v_fneg_fp_extend_f32_to_f64(float %a) #0 {
1670 ; GCN-LABEL: v_fneg_fp_extend_f32_to_f64:
1671 ; GCN:       ; %bb.0:
1672 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; GCN-NEXT:    v_cvt_f64_f32_e64 v[0:1], -v0
1674 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1675   %fpext = fpext float %a to double
1676   %fneg = fneg double %fpext
1677   ret double %fneg
1678 }
1679
1680 define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) #0 {
1681 ; GCN-LABEL: v_fneg_fp_extend_fneg_f32_to_f64:
1682 ; GCN:       ; %bb.0:
1683 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1684 ; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1685 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1686   %fneg.a = fneg float %a
1687   %fpext = fpext float %fneg.a to double
1688   %fneg = fneg double %fpext
1689   ret double %fneg
1690 }
1691
1692 define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) #0 {
1693 ; GCN-LABEL: v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1694 ; GCN:       ; %bb.0:
1695 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696 ; GCN-NEXT:    v_mov_b32_e32 v2, v0
1697 ; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
1698 ; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
1699 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1700   %fneg.a = fneg float %a
1701   %fpext = fpext float %fneg.a to double
1702   %fneg = fneg double %fpext
1703   %insert.0 = insertvalue { double, float } poison, double %fneg, 0
1704   %insert.1 = insertvalue { double, float } %insert.0, float %fneg.a, 1
1705   ret { double, float } %insert.1
1706 }
1707
1708 define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
1709 ; GCN-LABEL: v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1710 ; GCN:       ; %bb.0:
1711 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712 ; GCN-NEXT:    v_cvt_f64_f32_e32 v[2:3], v0
1713 ; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
1714 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
1715 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1716   %fpext = fpext float %a to double
1717   %fneg = fneg double %fpext
1718   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
1719   %insert.1 = insertvalue { double, double } %insert.0, double %fpext, 1
1720   ret { double, double } %insert.1
1721 }
1722
1723 define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
1724 ; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1725 ; SI:       ; %bb.0:
1726 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727 ; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1728 ; SI-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
1729 ; SI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
1730 ; SI-NEXT:    v_mov_b32_e32 v1, v4
1731 ; SI-NEXT:    s_setpc_b64 s[30:31]
1732 ;
1733 ; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1734 ; VI:       ; %bb.0:
1735 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1737 ; VI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
1738 ; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1739 ; VI-NEXT:    s_setpc_b64 s[30:31]
1740   %fpext = fpext float %a to double
1741   %fneg = fneg double %fpext
1742   %mul = fmul double %fpext, 4.0
1743   %insert.0 = insertvalue { double, double } poison, double %fneg, 0
1744   %insert.1 = insertvalue { double, double } %insert.0, double %mul, 1
1745   ret { double, double } %insert.1
1746 }
1747
1748 ; FIXME: Source modifiers not folded for f16->f32
1749 define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
1750 ; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1751 ; SI:       ; %bb.0:
1752 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753 ; SI-NEXT:    v_mov_b32_e32 v1, v0
1754 ; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1755 ; SI-NEXT:    s_setpc_b64 s[30:31]
1756 ;
1757 ; VI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1758 ; VI:       ; %bb.0:
1759 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; VI-NEXT:    v_cvt_f32_f16_e32 v1, v0
1761 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1762 ; VI-NEXT:    s_setpc_b64 s[30:31]
1763   %fpext = fpext half %a to float
1764   %fneg = fneg float %fpext
1765   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1766   %insert.1 = insertvalue { float, float } %insert.0, float %fpext, 1
1767   ret { float, float } %insert.1
1768 }
1769
1770 define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
1771 ; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1772 ; SI:       ; %bb.0:
1773 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774 ; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
1775 ; SI-NEXT:    v_mul_f32_e32 v1, 4.0, v0
1776 ; SI-NEXT:    v_mov_b32_e32 v0, v2
1777 ; SI-NEXT:    s_setpc_b64 s[30:31]
1778 ;
1779 ; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1780 ; VI:       ; %bb.0:
1781 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1782 ; VI-NEXT:    v_cvt_f32_f16_e32 v1, v0
1783 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1784 ; VI-NEXT:    v_mul_f32_e32 v1, 4.0, v1
1785 ; VI-NEXT:    s_setpc_b64 s[30:31]
1786   %fpext = fpext half %a to float
1787   %fneg = fneg float %fpext
1788   %mul = fmul float %fpext, 4.0
1789   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1790   %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
1791   ret { float, float } %insert.1
1792 }
1793
1794 ; --------------------------------------------------------------------------------
1795 ; fp_round tests
1796 ; --------------------------------------------------------------------------------
1797
1798 define float @v_fneg_fp_round_f64_to_f32(double %a) #0 {
1799 ; GCN-LABEL: v_fneg_fp_round_f64_to_f32:
1800 ; GCN:       ; %bb.0:
1801 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1802 ; GCN-NEXT:    v_cvt_f32_f64_e64 v0, -v[0:1]
1803 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1804   %fpround = fptrunc double %a to float
1805   %fneg = fneg float %fpround
1806   ret float %fneg
1807 }
1808
1809 define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) #0 {
1810 ; GCN-LABEL: v_fneg_fp_round_fneg_f64_to_f32:
1811 ; GCN:       ; %bb.0:
1812 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1813 ; GCN-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
1814 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1815   %fneg.a = fneg double %a
1816   %fpround = fptrunc double %fneg.a to float
1817   %fneg = fneg float %fpround
1818   ret float %fneg
1819 }
1820
1821 define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) #0 {
1822 ; GCN-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f32:
1823 ; GCN:       ; %bb.0:
1824 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1825 ; GCN-NEXT:    v_cvt_f32_f64_e32 v3, v[0:1]
1826 ; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
1827 ; GCN-NEXT:    v_mov_b32_e32 v1, v0
1828 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
1829 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1830   %fneg.a = fneg double %a
1831   %fpround = fptrunc double %fneg.a to float
1832   %fneg = fneg float %fpround
1833   %insert.0 = insertvalue { float, double } poison, float %fneg, 0
1834   %insert.1 = insertvalue { float, double } %insert.0, double %fneg.a, 1
1835   ret { float, double } %insert.1
1836 }
1837
1838 define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, double %c) #0 {
1839 ; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1840 ; SI:       ; %bb.0:
1841 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1842 ; SI-NEXT:    v_cvt_f32_f64_e32 v4, v[0:1]
1843 ; SI-NEXT:    v_mul_f64 v[1:2], -v[0:1], v[2:3]
1844 ; SI-NEXT:    v_mov_b32_e32 v0, v4
1845 ; SI-NEXT:    s_setpc_b64 s[30:31]
1846 ;
1847 ; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1848 ; VI:       ; %bb.0:
1849 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850 ; VI-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
1851 ; VI-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
1852 ; VI-NEXT:    v_mov_b32_e32 v1, v2
1853 ; VI-NEXT:    v_mov_b32_e32 v2, v3
1854 ; VI-NEXT:    s_setpc_b64 s[30:31]
1855   %fneg.a = fneg double %a
1856   %fpround = fptrunc double %fneg.a to float
1857   %fneg = fneg float %fpround
1858   %use1 = fmul double %fneg.a, %c
1859   %insert.0 = insertvalue { float, double } poison, float %fneg, 0
1860   %insert.1 = insertvalue { float, double } %insert.0, double %use1, 1
1861   ret { float, double } %insert.1
1862 }
1863
1864 define half @v_fneg_fp_round_f32_to_f16(float %a) #0 {
1865 ; SI-LABEL: v_fneg_fp_round_f32_to_f16:
1866 ; SI:       ; %bb.0:
1867 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868 ; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1869 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1870 ; SI-NEXT:    s_setpc_b64 s[30:31]
1871 ;
1872 ; VI-LABEL: v_fneg_fp_round_f32_to_f16:
1873 ; VI:       ; %bb.0:
1874 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1875 ; VI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1876 ; VI-NEXT:    s_setpc_b64 s[30:31]
1877   %fpround = fptrunc float %a to half
1878   %fneg = fneg half %fpround
1879   ret half %fneg
1880 }
1881
1882 define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) #0 {
1883 ; SI-LABEL: v_fneg_fp_round_fneg_f32_to_f16:
1884 ; SI:       ; %bb.0:
1885 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1886 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1887 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1888 ; SI-NEXT:    s_setpc_b64 s[30:31]
1889 ;
1890 ; VI-LABEL: v_fneg_fp_round_fneg_f32_to_f16:
1891 ; VI:       ; %bb.0:
1892 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1893 ; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1894 ; VI-NEXT:    s_setpc_b64 s[30:31]
1895   %fneg.a = fneg float %a
1896   %fpround = fptrunc float %fneg.a to half
1897   %fneg = fneg half %fpround
1898   ret half %fneg
1899 }
1900
1901 define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) #0 {
1902 ; GCN-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1903 ; GCN:       ; %bb.0:
1904 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905 ; GCN-NEXT:    v_cvt_f32_f64_e32 v1, v[0:1]
1906 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1907 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1908   %fpround = fptrunc double %a to float
1909   %fneg = fneg float %fpround
1910   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1911   %insert.1 = insertvalue { float, float } %insert.0, float %fpround, 1
1912   ret { float, float } %insert.1
1913 }
1914
1915 define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) #0 {
1916 ; SI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16:
1917 ; SI:       ; %bb.0:
1918 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1919 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v0
1920 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v1
1921 ; SI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
1922 ; SI-NEXT:    v_mov_b32_e32 v0, v2
1923 ; SI-NEXT:    s_setpc_b64 s[30:31]
1924 ;
1925 ; VI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16:
1926 ; VI:       ; %bb.0:
1927 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1928 ; VI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1929 ; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
1930 ; VI-NEXT:    v_mov_b32_e32 v0, v2
1931 ; VI-NEXT:    s_setpc_b64 s[30:31]
1932   %fneg.a = fneg float %a
1933   %fpround = fptrunc float %fneg.a to half
1934   %fneg = fneg half %fpround
1935   %insert.0 = insertvalue { half, float } poison, half %fneg, 0
1936   %insert.1 = insertvalue { half, float } %insert.0, float %fneg.a, 1
1937   ret { half, float } %insert.1
1938 }
1939
1940 define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, float %c) #0 {
1941 ; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1942 ; SI:       ; %bb.0:
1943 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1945 ; SI-NEXT:    v_mul_f32_e64 v1, -v0, v1
1946 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1947 ; SI-NEXT:    v_mov_b32_e32 v0, v2
1948 ; SI-NEXT:    s_setpc_b64 s[30:31]
1949 ;
1950 ; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1951 ; VI:       ; %bb.0:
1952 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953 ; VI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1954 ; VI-NEXT:    v_mul_f32_e64 v1, -v0, v1
1955 ; VI-NEXT:    v_mov_b32_e32 v0, v2
1956 ; VI-NEXT:    s_setpc_b64 s[30:31]
1957   %fneg.a = fneg float %a
1958   %fpround = fptrunc float %fneg.a to half
1959   %fneg = fneg half %fpround
1960   %use1 = fmul float %fneg.a, %c
1961   %insert.0 = insertvalue { half, float } poison, half %fneg, 0
1962   %insert.1 = insertvalue { half, float } %insert.0, float %use1, 1
1963   ret { half, float } %insert.1
1964 }
1965
1966 ; --------------------------------------------------------------------------------
1967 ; rcp tests
1968 ; --------------------------------------------------------------------------------
1969
1970 define float @v_fneg_rcp_f32(float %a) #0 {
1971 ; GCN-LABEL: v_fneg_rcp_f32:
1972 ; GCN:       ; %bb.0:
1973 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974 ; GCN-NEXT:    v_rcp_f32_e64 v0, -v0
1975 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1976   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1977   %fneg = fneg float %rcp
1978   ret float %fneg
1979 }
1980
1981 define float @v_fneg_rcp_fneg_f32(float %a) #0 {
1982 ; GCN-LABEL: v_fneg_rcp_fneg_f32:
1983 ; GCN:       ; %bb.0:
1984 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1985 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
1986 ; GCN-NEXT:    s_setpc_b64 s[30:31]
1987   %fneg.a = fneg float %a
1988   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1989   %fneg = fneg float %rcp
1990   ret float %fneg
1991 }
1992
1993 define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 {
1994 ; GCN-LABEL: v_fneg_rcp_store_use_fneg_f32:
1995 ; GCN:       ; %bb.0:
1996 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997 ; GCN-NEXT:    v_rcp_f32_e32 v5, v0
1998 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1999 ; GCN-NEXT:    flat_store_dword v[1:2], v5
2000 ; GCN-NEXT:    flat_store_dword v[3:4], v0
2001 ; GCN-NEXT:    s_waitcnt vmcnt(0)
2002 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2003   %fneg.a = fneg float %a
2004   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
2005   %fneg = fneg float %rcp
2006   store float %fneg, ptr addrspace(1) %ptr0
2007   store float %fneg.a, ptr addrspace(1) %ptr1
2008   ret void
2009 }
2010
2011 define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) #0 {
2012 ; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f32:
2013 ; GCN:       ; %bb.0:
2014 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015 ; GCN-NEXT:    v_rcp_f32_e32 v2, v0
2016 ; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v1
2017 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
2018 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2019   %fneg.a = fneg float %a
2020   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
2021   %fneg = fneg float %rcp
2022   %use1 = fmul float %fneg.a, %c
2023   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2024   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2025   ret { float, float } %insert.1
2026 }
2027
2028 ; Test getNegatedExpression works for rcp nodes
2029 define float @v_negated_rcp_f32(float %arg0, float %arg1) #1 {
2030 ; GCN-LABEL: v_negated_rcp_f32:
2031 ; GCN:       ; %bb.0:
2032 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033 ; GCN-NEXT:    v_fma_f32 v0, v0, v1, 2.0
2034 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
2035 ; GCN-NEXT:    v_add_f32_e32 v0, v1, v0
2036 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2037   %neg.arg0 = fneg float %arg0
2038   %fma = call nsz float @llvm.fma.f32(float %neg.arg0, float %arg1, float -2.0)
2039   %rcp0 = call float @llvm.amdgcn.rcp.f32(float %fma)
2040   %mul = fsub nsz float %arg1, %rcp0
2041   ret float %mul
2042 }
2043
2044 ; --------------------------------------------------------------------------------
2045 ; fmul_legacy tests
2046 ; --------------------------------------------------------------------------------
2047
2048 define float @v_fneg_mul_legacy_f32(float %a, float %b) #0 {
2049 ; GCN-LABEL: v_fneg_mul_legacy_f32:
2050 ; GCN:       ; %bb.0:
2051 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2053 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2054   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2055   %fneg = fneg float %mul
2056   ret float %fneg
2057 }
2058
2059 define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, float %b) #0 {
2060 ; GCN-LABEL: v_fneg_mul_legacy_store_use_mul_legacy_f32:
2061 ; GCN:       ; %bb.0:
2062 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; GCN-NEXT:    v_mul_legacy_f32_e32 v1, v0, v1
2064 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2065 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2066   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2067   %fneg = fneg float %mul
2068   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2069   %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
2070   ret { float, float } %insert.1
2071 }
2072
2073 define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, float %b) #0 {
2074 ; GCN-LABEL: v_fneg_mul_legacy_multi_use_mul_legacy_f32:
2075 ; GCN:       ; %bb.0:
2076 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2077 ; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2078 ; GCN-NEXT:    v_mul_legacy_f32_e64 v1, -v0, 4.0
2079 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2080   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2081   %fneg = fneg float %mul
2082   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
2083   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2084   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2085   ret { float, float } %insert.1
2086 }
2087
2088 define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) #0 {
2089 ; GCN-LABEL: v_fneg_mul_legacy_fneg_x_f32:
2090 ; GCN:       ; %bb.0:
2091 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2092 ; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2093 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2094   %fneg.a = fneg float %a
2095   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2096   %fneg = fneg float %mul
2097   ret float %fneg
2098 }
2099
2100 define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) #0 {
2101 ; GCN-LABEL: v_fneg_mul_legacy_x_fneg_f32:
2102 ; GCN:       ; %bb.0:
2103 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2104 ; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2105 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2106   %fneg.b = fneg float %b
2107   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
2108   %fneg = fneg float %mul
2109   ret float %fneg
2110 }
2111
2112 define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) #0 {
2113 ; GCN-LABEL: v_fneg_mul_legacy_fneg_fneg_f32:
2114 ; GCN:       ; %bb.0:
2115 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2116 ; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2117 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2118   %fneg.a = fneg float %a
2119   %fneg.b = fneg float %b
2120   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
2121   %fneg = fneg float %mul
2122   ret float %fneg
2123 }
2124
2125 define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float %b) #0 {
2126 ; GCN-LABEL: v_fneg_mul_legacy_store_use_fneg_x_f32:
2127 ; GCN:       ; %bb.0:
2128 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2129 ; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
2130 ; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2131 ; GCN-NEXT:    v_mov_b32_e32 v1, v2
2132 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2133   %fneg.a = fneg float %a
2134   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2135   %fneg = fneg float %mul
2136
2137   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2138   %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
2139   ret { float, float } %insert.1
2140 }
2141
2142 define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
2143 ; GCN-LABEL: v_fneg_mul_legacy_multi_use_fneg_x_f32:
2144 ; GCN:       ; %bb.0:
2145 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2146 ; GCN-NEXT:    v_mul_legacy_f32_e32 v3, v0, v1
2147 ; GCN-NEXT:    v_mul_legacy_f32_e64 v1, -v0, v2
2148 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
2149 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2150   %fneg.a = fneg float %a
2151   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2152   %fneg = fneg float %mul
2153   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
2154   %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2155   %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2156   ret { float, float } %insert.1
2157 }
2158
2159 ; --------------------------------------------------------------------------------
2160 ; sin tests
2161 ; --------------------------------------------------------------------------------
2162
2163 define float @v_fneg_sin_f32(float %a) #0 {
2164 ; GCN-LABEL: v_fneg_sin_f32:
2165 ; GCN:       ; %bb.0:
2166 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2167 ; GCN-NEXT:    v_mul_f32_e32 v0, 0xbe22f983, v0
2168 ; GCN-NEXT:    v_fract_f32_e32 v0, v0
2169 ; GCN-NEXT:    v_sin_f32_e32 v0, v0
2170 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2171   %sin = call float @llvm.sin.f32(float %a)
2172   %fneg = fneg float %sin
2173   ret float %fneg
2174 }
2175
2176 define float @v_fneg_amdgcn_sin_f32(float %a) #0 {
2177 ; GCN-LABEL: v_fneg_amdgcn_sin_f32:
2178 ; GCN:       ; %bb.0:
2179 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2180 ; GCN-NEXT:    v_sin_f32_e64 v0, -v0
2181 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2182   %sin = call float @llvm.amdgcn.sin.f32(float %a)
2183   %fneg = fneg float %sin
2184   ret float %fneg
2185 }
2186
2187 ; --------------------------------------------------------------------------------
2188 ; ftrunc tests
2189 ; --------------------------------------------------------------------------------
2190
2191 define float @v_fneg_trunc_f32(float %a) #0 {
2192 ; GCN-LABEL: v_fneg_trunc_f32:
2193 ; GCN:       ; %bb.0:
2194 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195 ; GCN-NEXT:    v_trunc_f32_e64 v0, -v0
2196 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2197   %trunc = call float @llvm.trunc.f32(float %a)
2198   %fneg = fneg float %trunc
2199   ret float %fneg
2200 }
2201
2202 ; --------------------------------------------------------------------------------
2203 ; fround tests
2204 ; --------------------------------------------------------------------------------
2205
2206 define float @v_fneg_round_f32(float %a) #0 {
2207 ; GCN-SAFE-LABEL: v_fneg_round_f32:
2208 ; GCN-SAFE:       ; %bb.0:
2209 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2210 ; GCN-SAFE-NEXT:    v_trunc_f32_e32 v1, v0
2211 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v2, v0, v1
2212 ; GCN-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
2213 ; GCN-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
2214 ; GCN-SAFE-NEXT:    s_brev_b32 s4, -2
2215 ; GCN-SAFE-NEXT:    v_bfi_b32 v0, s4, v2, v0
2216 ; GCN-SAFE-NEXT:    v_add_f32_e32 v0, v1, v0
2217 ; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2218 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
2219 ;
2220 ; GCN-NSZ-LABEL: v_fneg_round_f32:
2221 ; GCN-NSZ:       ; %bb.0:
2222 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223 ; GCN-NSZ-NEXT:    v_trunc_f32_e32 v1, v0
2224 ; GCN-NSZ-NEXT:    v_sub_f32_e32 v2, v0, v1
2225 ; GCN-NSZ-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
2226 ; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
2227 ; GCN-NSZ-NEXT:    s_brev_b32 s4, -2
2228 ; GCN-NSZ-NEXT:    v_bfi_b32 v0, s4, v2, v0
2229 ; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v1, v0
2230 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
2231   %round = call float @llvm.round.f32(float %a)
2232   %fneg = fneg float %round
2233   ret float %fneg
2234 }
2235
2236 ; --------------------------------------------------------------------------------
2237 ; rint tests
2238 ; --------------------------------------------------------------------------------
2239
2240 define float @v_fneg_rint_f32(float %a) #0 {
2241 ; GCN-LABEL: v_fneg_rint_f32:
2242 ; GCN:       ; %bb.0:
2243 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GCN-NEXT:    v_rndne_f32_e64 v0, -v0
2245 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2246   %rint = call float @llvm.rint.f32(float %a)
2247   %fneg = fneg float %rint
2248   ret float %fneg
2249 }
2250
2251 ; --------------------------------------------------------------------------------
2252 ; nearbyint tests
2253 ; --------------------------------------------------------------------------------
2254
2255 define float @v_fneg_nearbyint_f32(float %a) #0 {
2256 ; GCN-LABEL: v_fneg_nearbyint_f32:
2257 ; GCN:       ; %bb.0:
2258 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259 ; GCN-NEXT:    v_rndne_f32_e64 v0, -v0
2260 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2261   %nearbyint = call float @llvm.nearbyint.f32(float %a)
2262   %fneg = fneg float %nearbyint
2263   ret float %fneg
2264 }
2265
2266 ; --------------------------------------------------------------------------------
2267 ; fcanonicalize tests
2268 ; --------------------------------------------------------------------------------
2269
2270 define float @v_fneg_canonicalize_f32(float %a) #0 {
2271 ; GCN-LABEL: v_fneg_canonicalize_f32:
2272 ; GCN:       ; %bb.0:
2273 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2274 ; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
2275 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2276   %trunc = call float @llvm.canonicalize.f32(float %a)
2277   %fneg = fneg float %trunc
2278   ret float %fneg
2279 }
2280
2281 ; --------------------------------------------------------------------------------
2282 ; arithmetic.fence tests
2283 ; --------------------------------------------------------------------------------
2284
2285 define float @v_fneg_arithmetic_fence_f32(float %a) #0 {
2286 ; GCN-LABEL: v_fneg_arithmetic_fence_f32:
2287 ; GCN:       ; %bb.0:
2288 ; GCN-NEXT:    ;ARITH_FENCE
2289 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2290 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2291 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2292   %fence = call float @llvm.arithmetic.fence.f32(float %a)
2293   %fneg = fneg float %fence
2294   ret float %fneg
2295 }
2296
2297 define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) #0 {
2298 ; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f32:
2299 ; GCN:       ; %bb.0:
2300 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2301 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
2302 ; GCN-NEXT:    ;ARITH_FENCE
2303 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2304 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2305   %mul = fmul float %a, %b
2306   %fence = call float @llvm.arithmetic.fence.f32(float %mul)
2307   %fneg = fneg float %fence
2308   ret float %fneg
2309 }
2310
2311 ; --------------------------------------------------------------------------------
2312 ; vintrp tests
2313 ; --------------------------------------------------------------------------------
2314
2315 define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) #0 {
2316 ; SI-LABEL: v_fneg_interp_p1_f32:
2317 ; SI:       ; %bb.0:
2318 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2319 ; SI-NEXT:    v_mul_f32_e64 v1, v0, -v1
2320 ; SI-NEXT:    s_mov_b32 m0, 0
2321 ; SI-NEXT:    v_interp_p1_f32 v0, v1, attr0.x
2322 ; SI-NEXT:    v_interp_p1_f32 v1, v1, attr0.y
2323 ; SI-NEXT:    s_setpc_b64 s[30:31]
2324 ;
2325 ; VI-LABEL: v_fneg_interp_p1_f32:
2326 ; VI:       ; %bb.0:
2327 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328 ; VI-NEXT:    v_mul_f32_e64 v1, v0, -v1
2329 ; VI-NEXT:    s_mov_b32 m0, 0
2330 ; VI-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
2331 ; VI-NEXT:    v_interp_p1_f32_e32 v1, v1, attr0.y
2332 ; VI-NEXT:    s_setpc_b64 s[30:31]
2333   %mul = fmul float %a, %b
2334   %fneg = fneg float %mul
2335   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2336   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2337   %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
2338   %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
2339   ret { float, float } %insert.1
2340 }
2341
2342 define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) #0 {
2343 ; SI-LABEL: v_fneg_interp_p2_f32:
2344 ; SI:       ; %bb.0:
2345 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2346 ; SI-NEXT:    v_mul_f32_e64 v2, v0, -v1
2347 ; SI-NEXT:    v_mov_b32_e32 v1, 4.0
2348 ; SI-NEXT:    v_mov_b32_e32 v0, 4.0
2349 ; SI-NEXT:    s_mov_b32 m0, 0
2350 ; SI-NEXT:    v_interp_p2_f32 v0, v2, attr0.x
2351 ; SI-NEXT:    v_interp_p2_f32 v1, v2, attr0.y
2352 ; SI-NEXT:    s_setpc_b64 s[30:31]
2353 ;
2354 ; VI-LABEL: v_fneg_interp_p2_f32:
2355 ; VI:       ; %bb.0:
2356 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357 ; VI-NEXT:    v_mul_f32_e64 v2, v0, -v1
2358 ; VI-NEXT:    v_mov_b32_e32 v1, 4.0
2359 ; VI-NEXT:    v_mov_b32_e32 v0, 4.0
2360 ; VI-NEXT:    s_mov_b32 m0, 0
2361 ; VI-NEXT:    v_interp_p2_f32_e32 v0, v2, attr0.x
2362 ; VI-NEXT:    v_interp_p2_f32_e32 v1, v2, attr0.y
2363 ; VI-NEXT:    s_setpc_b64 s[30:31]
2364   %mul = fmul float %a, %b
2365   %fneg = fneg float %mul
2366   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2367   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2368   %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
2369   %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
2370   ret { float, float } %insert.1
2371 }
2372
2373 ; --------------------------------------------------------------------------------
2374 ; CopyToReg tests
2375 ; --------------------------------------------------------------------------------
2376
2377 define void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
2378 ; SI-LABEL: v_fneg_copytoreg_f32:
2379 ; SI:       ; %bb.0:
2380 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2381 ; SI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
2382 ; SI-NEXT:    v_lshlrev_b32_e32 v6, 2, v6
2383 ; SI-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2384 ; SI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2385 ; SI-NEXT:    v_mul_f32_e32 v2, v2, v3
2386 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
2387 ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2388 ; SI-NEXT:    s_cbranch_execz .LBB118_2
2389 ; SI-NEXT:  ; %bb.1: ; %if
2390 ; SI-NEXT:    v_mul_f32_e64 v3, -v2, v4
2391 ; SI-NEXT:    flat_store_dword v[0:1], v3
2392 ; SI-NEXT:    s_waitcnt vmcnt(0)
2393 ; SI-NEXT:  .LBB118_2: ; %endif
2394 ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
2395 ; SI-NEXT:    flat_store_dword v[0:1], v2
2396 ; SI-NEXT:    s_waitcnt vmcnt(0)
2397 ; SI-NEXT:    s_setpc_b64 s[30:31]
2398 ;
2399 ; VI-LABEL: v_fneg_copytoreg_f32:
2400 ; VI:       ; %bb.0:
2401 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2402 ; VI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
2403 ; VI-NEXT:    v_lshlrev_b32_e32 v6, 2, v6
2404 ; VI-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2405 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2406 ; VI-NEXT:    v_mul_f32_e32 v2, v2, v3
2407 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
2408 ; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2409 ; VI-NEXT:    s_cbranch_execz .LBB118_2
2410 ; VI-NEXT:  ; %bb.1: ; %if
2411 ; VI-NEXT:    v_mul_f32_e64 v3, -v2, v4
2412 ; VI-NEXT:    flat_store_dword v[0:1], v3
2413 ; VI-NEXT:    s_waitcnt vmcnt(0)
2414 ; VI-NEXT:  .LBB118_2: ; %endif
2415 ; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
2416 ; VI-NEXT:    flat_store_dword v[0:1], v2
2417 ; VI-NEXT:    s_waitcnt vmcnt(0)
2418 ; VI-NEXT:    s_setpc_b64 s[30:31]
2419   %tid = call i32 @llvm.amdgcn.workitem.id.x()
2420   %tid.ext = sext i32 %tid to i64
2421   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2422   %mul = fmul float %a, %b
2423   %fneg = fneg float %mul
2424   %cmp0 = icmp eq i32 %d, 0
2425   br i1 %cmp0, label %if, label %endif
2426
2427 if:
2428   %mul1 = fmul float %fneg, %c
2429   store volatile float %mul1, ptr addrspace(1) %out.gep
2430   br label %endif
2431
2432 endif:
2433   store volatile float %mul, ptr addrspace(1) %out.gep
2434   ret void
2435 }
2436
2437 ; --------------------------------------------------------------------------------
2438 ; inlineasm tests
2439 ; --------------------------------------------------------------------------------
2440
2441 ; Can't fold into use, so should fold into source
2442 define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 {
2443 ; GCN-LABEL: v_fneg_inlineasm_f32:
2444 ; GCN:       ; %bb.0:
2445 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2446 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
2447 ; GCN-NEXT:    ;;#ASMSTART
2448 ; GCN-NEXT:    ; use v0
2449 ; GCN-NEXT:    ;;#ASMEND
2450 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2451   %mul = fmul float %a, %b
2452   %fneg = fneg float %mul
2453   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2454   ret float %fneg
2455 }
2456
2457 ; --------------------------------------------------------------------------------
2458 ; inlineasm tests
2459 ; --------------------------------------------------------------------------------
2460
2461 ; Can't fold into use, so should fold into source
2462 define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
2463 ; GCN-LABEL: v_fneg_inlineasm_multi_use_src_f32:
2464 ; GCN:       ; %bb.0:
2465 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466 ; GCN-NEXT:    v_mul_f32_e32 v0, v2, v3
2467 ; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
2468 ; GCN-NEXT:    ;;#ASMSTART
2469 ; GCN-NEXT:    ; use v1
2470 ; GCN-NEXT:    ;;#ASMEND
2471 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2472   %mul = fmul float %a, %b
2473   %fneg = fneg float %mul
2474   call void asm sideeffect "; use $0", "v"(float %fneg) #0
2475   ret float %mul
2476 }
2477
2478 ; --------------------------------------------------------------------------------
2479 ; code size regression tests
2480 ; --------------------------------------------------------------------------------
2481
2482 ; There are multiple users of the fneg that must use a VOP3
2483 ; instruction, so there is no penalty
2484 define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, float %c) #0 {
2485 ; GCN-LABEL: multiuse_fneg_2_vop3_users_f32:
2486 ; GCN:       ; %bb.0:
2487 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488 ; GCN-NEXT:    v_fma_f32 v3, -v0, v1, v2
2489 ; GCN-NEXT:    v_fma_f32 v1, -v0, v2, 2.0
2490 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
2491 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2492   %fneg.a = fneg float %a
2493   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2494   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2495   %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2496   %insert.1 = insertvalue { float, float } %insert.0, float %fma1, 1
2497   ret { float, float } %insert.1
2498 }
2499
2500 ; There are multiple users, but both require using a larger encoding
2501 ; for the modifier.
2502 define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, float %c) #0 {
2503 ; GCN-LABEL: multiuse_fneg_2_vop2_users_f32:
2504 ; GCN:       ; %bb.0:
2505 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2506 ; GCN-NEXT:    v_mul_f32_e64 v3, -v0, v1
2507 ; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
2508 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
2509 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2510   %fneg.a = fneg float %a
2511   %mul0 = fmul float %fneg.a, %b
2512   %mul1 = fmul float %fneg.a, %c
2513   %insert.0 = insertvalue { float, float } poison, float %mul0, 0
2514   %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2515   ret { float, float } %insert.1
2516 }
2517
2518 ; One user is VOP3 so has no cost to folding the modifier, the other does.
2519 define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, float %c) #0 {
2520 ; GCN-LABEL: multiuse_fneg_vop2_vop3_users_f32:
2521 ; GCN:       ; %bb.0:
2522 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; GCN-NEXT:    v_fma_f32 v3, -v0, v1, 2.0
2524 ; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
2525 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
2526 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2527   %fneg.a = fneg float %a
2528   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2529   %mul1 = fmul float %fneg.a, %c
2530
2531   %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2532   %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2533   ret { float, float } %insert.1
2534 }
2535
2536 ; The use of the fneg requires a code size increase, but folding into
2537 ; the source does not
2538 define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, float %a, float %b, float %c, float %d) #0 {
2539 ; GCN-SAFE-LABEL: free_fold_src_code_size_cost_use_f32:
2540 ; GCN-SAFE:       ; %bb.0:
2541 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2542 ; GCN-SAFE-NEXT:    v_fma_f32 v1, v2, v3, 2.0
2543 ; GCN-SAFE-NEXT:    v_mul_f32_e64 v0, -v1, v4
2544 ; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v1, v5
2545 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
2546 ;
2547 ; GCN-NSZ-LABEL: free_fold_src_code_size_cost_use_f32:
2548 ; GCN-NSZ:       ; %bb.0:
2549 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550 ; GCN-NSZ-NEXT:    v_fma_f32 v1, v2, -v3, -2.0
2551 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, v1, v4
2552 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, v1, v5
2553 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
2554   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2555   %fneg.fma0 = fneg float %fma0
2556   %mul1 = fmul float %fneg.fma0, %c
2557   %mul2 = fmul float %fneg.fma0, %d
2558
2559   %insert.0 = insertvalue { float, float } poison, float %mul1, 0
2560   %insert.1 = insertvalue { float, float } %insert.0, float %mul2, 1
2561   ret { float, float } %insert.1
2562 }
2563
2564 define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, double %b, double %c, double %d) #0 {
2565 ; GCN-LABEL: free_fold_src_code_size_cost_use_f64:
2566 ; GCN:       ; %bb.0:
2567 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2568 ; GCN-NEXT:    v_fma_f64 v[2:3], v[0:1], v[2:3], 2.0
2569 ; GCN-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[4:5]
2570 ; GCN-NEXT:    v_mul_f64 v[2:3], -v[2:3], v[6:7]
2571 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2572   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2573   %fneg.fma0 = fneg double %fma0
2574   %mul1 = fmul double %fneg.fma0, %c
2575   %mul2 = fmul double %fneg.fma0, %d
2576
2577   %insert.0 = insertvalue { double, double } poison, double %mul1, 0
2578   %insert.1 = insertvalue { double, double } %insert.0, double %mul2, 1
2579   ret { double, double } %insert.1
2580 }
2581
2582 ; %trunc.a has one fneg use, but it requires a code size increase and
2583 ; %the fneg can instead be folded for free into the fma.
2584 define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, float %d) #0 {
2585 ; GCN-LABEL: one_use_cost_to_fold_into_src_f32:
2586 ; GCN:       ; %bb.0:
2587 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588 ; GCN-NEXT:    v_trunc_f32_e32 v0, v0
2589 ; GCN-NEXT:    v_fma_f32 v0, -v0, v1, v2
2590 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2591   %trunc.a = call float @llvm.trunc.f32(float %a)
2592   %trunc.fneg.a = fneg float %trunc.a
2593   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2594   ret float %fma0
2595 }
2596
2597 define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, float %c, float %d) #0 {
2598 ; GCN-LABEL: multi_use_cost_to_fold_into_src:
2599 ; GCN:       ; %bb.0:
2600 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601 ; GCN-NEXT:    v_trunc_f32_e32 v4, v0
2602 ; GCN-NEXT:    v_fma_f32 v0, -v4, v1, v2
2603 ; GCN-NEXT:    v_mul_f32_e32 v1, v4, v3
2604 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2605   %trunc.a = call float @llvm.trunc.f32(float %a)
2606   %trunc.fneg.a = fneg float %trunc.a
2607   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2608   %mul1 = fmul float %trunc.a, %d
2609   %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2610   %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2611   ret { float, float } %insert.1
2612 }
2613
2614 ; The AMDGPU combine to pull fneg into the FMA operands was being
2615 ; undone by the generic combine to pull the fneg out of the fma if
2616 ; !isFNegFree. We were reporting false for v2f32 even though it will
2617 ; be split into f32 where it will be free.
2618 define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
2619 ; GCN-LABEL: fneg_fma_fneg_dagcombine_loop:
2620 ; GCN:       ; %bb.0: ; %bb
2621 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2622 ; GCN-NEXT:    s_brev_b32 s4, 1
2623 ; GCN-NEXT:    v_fma_f32 v3, v3, -v5, s4
2624 ; GCN-NEXT:    v_fma_f32 v2, v2, -v4, s4
2625 ; GCN-NEXT:    v_sub_f32_e32 v1, v3, v1
2626 ; GCN-NEXT:    v_sub_f32_e32 v0, v2, v0
2627 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v4
2628 ; GCN-NEXT:    v_mul_f32_e32 v1, v1, v5
2629 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2630 bb:
2631   %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
2632   %i4 = fadd fast <2 x float> %i3, %arg
2633   %i5 = fneg <2 x float> %i4
2634   %i6 = fmul fast <2 x float> %i5, %arg2
2635   ret <2 x float> %i6
2636 }
2637
2638 ; This expects denormal flushing, so can't turn this fmul into fneg
2639 ; TODO: Keeping this as fmul saves encoding size
2640 define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2641 ; GCN-LABEL: nnan_fmul_neg1_to_fneg:
2642 ; GCN:       ; %bb.0:
2643 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2644 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2645 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2646   %mul = fmul float %x, -1.0
2647   %add = fmul nnan float %mul, %y
2648   ret float %add
2649 }
2650
2651 ; It's legal to turn this fmul into an fneg since denormals are
2652 ; preserved and we know an snan can't happen from the flag.
2653 define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
2654 ; GCN-LABEL: denormal_fmul_neg1_to_fneg:
2655 ; GCN:       ; %bb.0:
2656 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2657 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2658 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2659   %mul = fmul nnan float %x, -1.0
2660   %add = fmul float %mul, %y
2661   ret float %add
2662 }
2663
2664 ; know the source can't be an snan
2665 define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
2666 ; GCN-LABEL: denorm_snan_fmul_neg1_to_fneg:
2667 ; GCN:       ; %bb.0:
2668 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2669 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v0
2670 ; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
2671 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2672   %canonical = fmul float %x, %x
2673   %mul = fmul float %canonical, -1.0
2674   %add = fmul float %mul, %y
2675   ret float %add
2676 }
2677
2678 define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2679 ; GCN-LABEL: flush_snan_fmul_neg1_to_fneg:
2680 ; GCN:       ; %bb.0:
2681 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2682 ; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
2683 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2684 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2685   %quiet = call float @llvm.canonicalize.f32(float %x)
2686   %mul = fmul float %quiet, -1.0
2687   %add = fmul float %mul, %y
2688   ret float %add
2689 }
2690
2691 define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
2692 ; GCN-LABEL: fadd_select_fneg_fneg_f32:
2693 ; GCN:       ; %bb.0:
2694 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2695 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2696 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2697 ; GCN-NEXT:    v_sub_f32_e32 v0, v3, v0
2698 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2699   %cmp = icmp eq i32 %arg0, 0
2700   %neg.x = fneg float %x
2701   %neg.y  = fneg float %y
2702   %select = select i1 %cmp, float %neg.x, float %neg.y
2703   %add = fadd float %select, %z
2704   ret float %add
2705 }
2706
2707 define double @fadd_select_fneg_fneg_f64(i32 %arg0, double %x, double %y, double %z) {
2708 ; GCN-LABEL: fadd_select_fneg_fneg_f64:
2709 ; GCN:       ; %bb.0:
2710 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2711 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2712 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
2713 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
2714 ; GCN-NEXT:    v_add_f64 v[0:1], v[5:6], -v[1:2]
2715 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2716   %cmp = icmp eq i32 %arg0, 0
2717   %neg.x = fneg double %x
2718   %neg.y  = fneg double %y
2719   %select = select i1 %cmp, double %neg.x, double %neg.y
2720   %add = fadd double %select, %z
2721   ret double %add
2722 }
2723
2724 define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
2725 ; SI-LABEL: fadd_select_fneg_fneg_f16:
2726 ; SI:       ; %bb.0:
2727 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2728 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2729 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2730 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2731 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2732 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2733 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2734 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
2735 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2736 ; SI-NEXT:    v_sub_f32_e32 v0, v3, v0
2737 ; SI-NEXT:    s_setpc_b64 s[30:31]
2738 ;
2739 ; VI-LABEL: fadd_select_fneg_fneg_f16:
2740 ; VI:       ; %bb.0:
2741 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2742 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2743 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2744 ; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
2745 ; VI-NEXT:    s_setpc_b64 s[30:31]
2746   %cmp = icmp eq i32 %arg0, 0
2747   %neg.x = fneg half %x
2748   %neg.y = fneg half %y
2749   %select = select i1 %cmp, half %neg.x, half %neg.y
2750   %add = fadd half %select, %z
2751   ret half %add
2752 }
2753
2754 ; FIXME: Terrible code for SI
2755 define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2756 ; SI-LABEL: fadd_select_fneg_fneg_v2f16:
2757 ; SI:       ; %bb.0:
2758 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2759 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2760 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2761 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2762 ; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
2763 ; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2764 ; SI-NEXT:    v_or_b32_e32 v1, v1, v2
2765 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v4
2766 ; SI-NEXT:    v_cvt_f16_f32_e32 v4, v6
2767 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2768 ; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2769 ; SI-NEXT:    v_or_b32_e32 v2, v3, v2
2770 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2771 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
2772 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v4
2773 ; SI-NEXT:    v_cvt_f32_f16_e32 v4, v5
2774 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2775 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2776 ; SI-NEXT:    v_sub_f32_e32 v0, v4, v0
2777 ; SI-NEXT:    v_sub_f32_e32 v1, v3, v1
2778 ; SI-NEXT:    s_setpc_b64 s[30:31]
2779 ;
2780 ; VI-LABEL: fadd_select_fneg_fneg_v2f16:
2781 ; VI:       ; %bb.0:
2782 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2783 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2784 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2785 ; VI-NEXT:    v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2786 ; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
2787 ; VI-NEXT:    v_or_b32_e32 v0, v0, v1
2788 ; VI-NEXT:    s_setpc_b64 s[30:31]
2789   %cmp = icmp eq i32 %arg0, 0
2790   %neg.x = fneg <2 x half> %x
2791   %neg.y = fneg <2 x half> %y
2792   %select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y
2793   %add = fadd <2 x half> %select, %z
2794   ret <2 x half> %add
2795 }
2796
2797 ; --------------------------------------------------------------------------------
2798 ; select tests
2799 ; --------------------------------------------------------------------------------
2800
2801 define amdgpu_kernel void @s_fneg_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
2802 ; SI-LABEL: s_fneg_select_infloop_regression_f32:
2803 ; SI:       ; %bb.0:
2804 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
2805 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
2806 ; SI-NEXT:    s_bitcmp1_b32 s1, 0
2807 ; SI-NEXT:    v_mov_b32_e32 v0, s0
2808 ; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
2809 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
2810 ; SI-NEXT:    v_cndmask_b32_e64 v2, -v0, 0, s[0:1]
2811 ; SI-NEXT:    v_mov_b32_e32 v0, s2
2812 ; SI-NEXT:    v_mov_b32_e32 v1, s3
2813 ; SI-NEXT:    flat_store_dword v[0:1], v2
2814 ; SI-NEXT:    s_endpgm
2815 ;
2816 ; VI-LABEL: s_fneg_select_infloop_regression_f32:
2817 ; VI:       ; %bb.0:
2818 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
2819 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
2820 ; VI-NEXT:    s_bitcmp1_b32 s1, 0
2821 ; VI-NEXT:    v_mov_b32_e32 v0, s0
2822 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
2823 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
2824 ; VI-NEXT:    v_cndmask_b32_e64 v2, -v0, 0, s[0:1]
2825 ; VI-NEXT:    v_mov_b32_e32 v0, s2
2826 ; VI-NEXT:    v_mov_b32_e32 v1, s3
2827 ; VI-NEXT:    flat_store_dword v[0:1], v2
2828 ; VI-NEXT:    s_endpgm
2829   %i = select i1 %arg1, float 0.0, float %arg
2830   %i2 = fneg float %i
2831   %i3 = select i1 %arg1, float 0.0, float %i2
2832   store float %i3, ptr addrspace(1) %ptr, align 4
2833   ret void
2834 }
2835
2836 define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
2837 ; GCN-LABEL: v_fneg_select_infloop_regression_f32:
2838 ; GCN:       ; %bb.0:
2839 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2840 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2841 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2842 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
2843 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
2844 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2845   %i = select i1 %arg1, float 0.0, float %arg
2846   %i2 = fneg float %i
2847   %i3 = select i1 %arg1, float 0.0, float %i2
2848   ret float %i3
2849 }
2850
2851 define float @v_fneg_select_infloop_regression_f32_commute0(float %arg, i1 %arg1) {
2852 ; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute0:
2853 ; GCN:       ; %bb.0:
2854 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2855 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2856 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2857 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
2858 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
2859 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2860   %i = select i1 %arg1, float %arg, float 0.0
2861   %i2 = fneg float %i
2862   %i3 = select i1 %arg1, float 0.0, float %i2
2863   ret float %i3
2864 }
2865
2866 define float @v_fneg_select_infloop_regression_f32_commute1(float %arg, i1 %arg1) {
2867 ; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute1:
2868 ; GCN:       ; %bb.0:
2869 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2871 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2872 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
2873 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
2874 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2875   %i = select i1 %arg1, float 0.0, float %arg
2876   %i2 = fneg float %i
2877   %i3 = select i1 %arg1, float %i2, float 0.0
2878   ret float %i3
2879 }
2880
2881 define float @v_fneg_select_infloop_regression_f32_commute2(float %arg, i1 %arg1) {
2882 ; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute2:
2883 ; GCN:       ; %bb.0:
2884 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2885 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2886 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2887 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
2888 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
2889 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2890   %i = select i1 %arg1, float %arg, float 0.0
2891   %i2 = fneg float %i
2892   %i3 = select i1 %arg1, float %i2, float 0.0
2893   ret float %i3
2894 }
2895
2896 ; Check with an inline constant that's equally cheap to negate
2897 define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %arg1) {
2898 ; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32:
2899 ; GCN:       ; %bb.0:
2900 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2901 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2902 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2903 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
2904 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
2905 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2906   %i = select i1 %arg1, float 2.0, float %arg
2907   %i2 = fneg float %i
2908   %i3 = select i1 %arg1, float 2.0, float %i2
2909   ret float %i3
2910 }
2911
2912 define float @v_fneg_select_infloop_regression_inline_imm_f32_commute0(float %arg, i1 %arg1) {
2913 ; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute0:
2914 ; GCN:       ; %bb.0:
2915 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2916 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2917 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2918 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
2919 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
2920 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2921   %i = select i1 %arg1, float %arg, float 2.0
2922   %i2 = fneg float %i
2923   %i3 = select i1 %arg1, float 2.0, float %i2
2924   ret float %i3
2925 }
2926
2927 define float @v_fneg_select_infloop_regression_inline_imm_f32_commute1(float %arg, i1 %arg1) {
2928 ; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute1:
2929 ; GCN:       ; %bb.0:
2930 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2931 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2932 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2933 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
2934 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
2935 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2936   %i = select i1 %arg1, float 2.0, float %arg
2937   %i2 = fneg float %i
2938   %i3 = select i1 %arg1, float %i2, float 2.0
2939   ret float %i3
2940 }
2941
2942 define float @v_fneg_select_infloop_regression_inline_imm_f32_commute2(float %arg, i1 %arg1) {
2943 ; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute2:
2944 ; GCN:       ; %bb.0:
2945 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2946 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2947 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2948 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
2949 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
2950 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2951   %i = select i1 %arg1, float %arg, float 2.0
2952   %i2 = fneg float %i
2953   %i3 = select i1 %arg1, float %i2, float 2.0
2954   ret float %i3
2955 }
2956
2957 ; Check with an inline constant that's equally cheap to negate
2958 define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 %arg1) {
2959 ; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32:
2960 ; GCN:       ; %bb.0:
2961 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2962 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2963 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2964 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
2965 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
2966 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2967   %i = select i1 %arg1, float -2.0, float %arg
2968   %i2 = fneg float %i
2969   %i3 = select i1 %arg1, float -2.0, float %i2
2970   ret float %i3
2971 }
2972
2973 define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0(float %arg, i1 %arg1) {
2974 ; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0:
2975 ; GCN:       ; %bb.0:
2976 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2978 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2979 ; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
2980 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
2981 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2982   %i = select i1 %arg1, float %arg, float -2.0
2983   %i2 = fneg float %i
2984   %i3 = select i1 %arg1, float -2.0, float %i2
2985   ret float %i3
2986 }
2987
2988 define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1(float %arg, i1 %arg1) {
2989 ; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1:
2990 ; GCN:       ; %bb.0:
2991 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2992 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2993 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2994 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
2995 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
2996 ; GCN-NEXT:    s_setpc_b64 s[30:31]
2997   %i = select i1 %arg1, float -2.0, float %arg
2998   %i2 = fneg float %i
2999   %i3 = select i1 %arg1, float %i2, float -2.0
3000   ret float %i3
3001 }
3002
3003 define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2(float %arg, i1 %arg1) {
3004 ; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2:
3005 ; GCN:       ; %bb.0:
3006 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3007 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3008 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3009 ; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
3010 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
3011 ; GCN-NEXT:    s_setpc_b64 s[30:31]
3012   %i = select i1 %arg1, float %arg, float -2.0
3013   %i2 = fneg float %i
3014   %i3 = select i1 %arg1, float %i2, float -2.0
3015   ret float %i3
3016 }
3017
3018 define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3019 ; SI-LABEL: s_fneg_select_infloop_regression_f64:
3020 ; SI:       ; %bb.0:
3021 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
3022 ; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
3023 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
3024 ; SI-NEXT:    v_bfrev_b32_e32 v0, 1
3025 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3026 ; SI-NEXT:    s_bitcmp1_b32 s4, 0
3027 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
3028 ; SI-NEXT:    v_mov_b32_e32 v1, s3
3029 ; SI-NEXT:    s_and_b64 s[6:7], s[4:5], exec
3030 ; SI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
3031 ; SI-NEXT:    s_cselect_b32 s2, 0, s2
3032 ; SI-NEXT:    v_mov_b32_e32 v3, s1
3033 ; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[4:5]
3034 ; SI-NEXT:    v_mov_b32_e32 v0, s2
3035 ; SI-NEXT:    v_mov_b32_e32 v2, s0
3036 ; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3037 ; SI-NEXT:    s_endpgm
3038 ;
3039 ; VI-LABEL: s_fneg_select_infloop_regression_f64:
3040 ; VI:       ; %bb.0:
3041 ; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
3042 ; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
3043 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
3044 ; VI-NEXT:    v_bfrev_b32_e32 v0, 1
3045 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3046 ; VI-NEXT:    s_bitcmp1_b32 s4, 0
3047 ; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
3048 ; VI-NEXT:    v_mov_b32_e32 v1, s3
3049 ; VI-NEXT:    s_and_b64 s[6:7], s[4:5], exec
3050 ; VI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
3051 ; VI-NEXT:    s_cselect_b32 s2, 0, s2
3052 ; VI-NEXT:    v_mov_b32_e32 v3, s1
3053 ; VI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[4:5]
3054 ; VI-NEXT:    v_mov_b32_e32 v0, s2
3055 ; VI-NEXT:    v_mov_b32_e32 v2, s0
3056 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3057 ; VI-NEXT:    s_endpgm
3058   %i = select i1 %arg1, double 0.0, double %arg
3059   %i2 = fneg double %i
3060   %i3 = select i1 %arg1, double 0.0, double %i2
3061   store double %i3, ptr addrspace(1) %ptr, align 4
3062   ret void
3063 }
3064
3065 define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
3066 ; GCN-LABEL: v_fneg_select_infloop_regression_f64:
3067 ; GCN:       ; %bb.0:
3068 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3069 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
3070 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
3071 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3072 ; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
3073 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3074 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
3075 ; GCN-NEXT:    s_setpc_b64 s[30:31]
3076   %i = select i1 %arg1, double 0.0, double %arg
3077   %i2 = fneg double %i
3078   %i3 = select i1 %arg1, double 0.0, double %i2
3079   ret double %i3
3080 }
3081
3082 define amdgpu_kernel void @s_fneg_select_infloop_regression_f16(half %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3083 ; SI-LABEL: s_fneg_select_infloop_regression_f16:
3084 ; SI:       ; %bb.0:
3085 ; SI-NEXT:    s_load_dword s2, s[0:1], 0x9
3086 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
3087 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3088 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
3089 ; SI-NEXT:    s_bitcmp1_b32 s2, 16
3090 ; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3091 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[2:3]
3092 ; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, s[2:3]
3093 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v0
3094 ; SI-NEXT:    v_mov_b32_e32 v0, s0
3095 ; SI-NEXT:    v_mov_b32_e32 v1, s1
3096 ; SI-NEXT:    flat_store_short v[0:1], v2
3097 ; SI-NEXT:    s_endpgm
3098 ;
3099 ; VI-LABEL: s_fneg_select_infloop_regression_f16:
3100 ; VI:       ; %bb.0:
3101 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x24
3102 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
3103 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3104 ; VI-NEXT:    s_bitcmp1_b32 s2, 16
3105 ; VI-NEXT:    v_mov_b32_e32 v0, s2
3106 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3107 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[2:3]
3108 ; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3109 ; VI-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s[2:3]
3110 ; VI-NEXT:    v_mov_b32_e32 v0, s0
3111 ; VI-NEXT:    v_mov_b32_e32 v1, s1
3112 ; VI-NEXT:    flat_store_short v[0:1], v2
3113 ; VI-NEXT:    s_endpgm
3114   %i = select i1 %arg1, half 0.0, half %arg
3115   %i2 = fneg half %i
3116   %i3 = select i1 %arg1, half 0.0, half %i2
3117   store half %i3, ptr addrspace(1) %ptr, align 4
3118   ret void
3119 }
3120
3121 define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) {
3122 ; SI-LABEL: v_fneg_select_infloop_regression_f16:
3123 ; SI:       ; %bb.0:
3124 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3125 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
3126 ; SI-NEXT:    v_and_b32_e32 v1, 1, v1
3127 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3128 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3129 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3130 ; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
3131 ; SI-NEXT:    s_setpc_b64 s[30:31]
3132 ;
3133 ; VI-LABEL: v_fneg_select_infloop_regression_f16:
3134 ; VI:       ; %bb.0:
3135 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3136 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
3137 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3138 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3139 ; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3140 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3141 ; VI-NEXT:    s_setpc_b64 s[30:31]
3142   %i = select i1 %arg1, half 0.0, half %arg
3143   %i2 = fneg half %i
3144   %i3 = select i1 %arg1, half 0.0, half %i2
3145   ret half %i3
3146 }
3147
3148 define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3149 ; SI-LABEL: s_fneg_select_infloop_regression_v2f16:
3150 ; SI:       ; %bb.0:
3151 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
3152 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3153 ; SI-NEXT:    s_and_b32 s1, 1, s1
3154 ; SI-NEXT:    s_cselect_b32 s0, 0, s0
3155 ; SI-NEXT:    s_xor_b32 s0, s0, 0x80008000
3156 ; SI-NEXT:    s_cmp_eq_u32 s1, 1
3157 ; SI-NEXT:    s_cselect_b32 s0, 0, s0
3158 ; SI-NEXT:    v_mov_b32_e32 v0, s2
3159 ; SI-NEXT:    v_mov_b32_e32 v1, s3
3160 ; SI-NEXT:    v_mov_b32_e32 v2, s0
3161 ; SI-NEXT:    flat_store_dword v[0:1], v2
3162 ; SI-NEXT:    s_endpgm
3163 ;
3164 ; VI-LABEL: s_fneg_select_infloop_regression_v2f16:
3165 ; VI:       ; %bb.0:
3166 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
3167 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3168 ; VI-NEXT:    s_and_b32 s1, 1, s1
3169 ; VI-NEXT:    s_cselect_b32 s0, 0, s0
3170 ; VI-NEXT:    s_xor_b32 s0, s0, 0x80008000
3171 ; VI-NEXT:    s_cmp_eq_u32 s1, 1
3172 ; VI-NEXT:    s_cselect_b32 s0, 0, s0
3173 ; VI-NEXT:    v_mov_b32_e32 v0, s2
3174 ; VI-NEXT:    v_mov_b32_e32 v1, s3
3175 ; VI-NEXT:    v_mov_b32_e32 v2, s0
3176 ; VI-NEXT:    flat_store_dword v[0:1], v2
3177 ; VI-NEXT:    s_endpgm
3178   %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
3179   %i2 = fneg <2 x half> %i
3180   %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
3181   store <2 x half> %i3, ptr addrspace(1) %ptr, align 4
3182   ret void
3183 }
3184
3185 define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1) {
3186 ; SI-LABEL: v_fneg_select_infloop_regression_v2f16:
3187 ; SI:       ; %bb.0:
3188 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3189 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
3190 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
3191 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3192 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
3193 ; SI-NEXT:    v_and_b32_e32 v1, 1, v2
3194 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3195 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3196 ; SI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
3197 ; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, vcc
3198 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
3199 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3200 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
3201 ; SI-NEXT:    s_setpc_b64 s[30:31]
3202 ;
3203 ; VI-LABEL: v_fneg_select_infloop_regression_v2f16:
3204 ; VI:       ; %bb.0:
3205 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3206 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
3207 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3208 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3209 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
3210 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3211 ; VI-NEXT:    s_setpc_b64 s[30:31]
3212   %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
3213   %i2 = fneg <2 x half> %i
3214   %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
3215   ret <2 x half> %i3
3216 }
3217
3218 define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3219 ; SI-LABEL: s_fneg_select_infloop_regression_v2f32:
3220 ; SI:       ; %bb.0:
3221 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
3222 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
3223 ; SI-NEXT:    v_bfrev_b32_e32 v0, 1
3224 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3225 ; SI-NEXT:    s_bitcmp1_b32 s6, 0
3226 ; SI-NEXT:    v_mov_b32_e32 v1, s4
3227 ; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3228 ; SI-NEXT:    v_cndmask_b32_e64 v2, -v1, v0, s[2:3]
3229 ; SI-NEXT:    v_mov_b32_e32 v1, s5
3230 ; SI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[2:3]
3231 ; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[2:3]
3232 ; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, s[2:3]
3233 ; SI-NEXT:    v_mov_b32_e32 v3, s1
3234 ; SI-NEXT:    v_mov_b32_e32 v2, s0
3235 ; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3236 ; SI-NEXT:    s_endpgm
3237 ;
3238 ; VI-LABEL: s_fneg_select_infloop_regression_v2f32:
3239 ; VI:       ; %bb.0:
3240 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
3241 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
3242 ; VI-NEXT:    v_bfrev_b32_e32 v0, 1
3243 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3244 ; VI-NEXT:    s_bitcmp1_b32 s6, 0
3245 ; VI-NEXT:    v_mov_b32_e32 v1, s4
3246 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3247 ; VI-NEXT:    v_cndmask_b32_e64 v2, -v1, v0, s[2:3]
3248 ; VI-NEXT:    v_mov_b32_e32 v1, s5
3249 ; VI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[2:3]
3250 ; VI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[2:3]
3251 ; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, s[2:3]
3252 ; VI-NEXT:    v_mov_b32_e32 v3, s1
3253 ; VI-NEXT:    v_mov_b32_e32 v2, s0
3254 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3255 ; VI-NEXT:    s_endpgm
3256   %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
3257   %i2 = fneg <2 x float> %i
3258   %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
3259   store <2 x float> %i3, ptr addrspace(1) %ptr, align 4
3260   ret void
3261 }
3262
3263 define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1) {
3264 ; GCN-LABEL: v_fneg_select_infloop_regression_v2f32:
3265 ; GCN:       ; %bb.0:
3266 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3267 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
3268 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
3269 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3270 ; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
3271 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, v3, vcc
3272 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3273 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
3274 ; GCN-NEXT:    s_setpc_b64 s[30:31]
3275   %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
3276   %i2 = fneg <2 x float> %i
3277   %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
3278   ret <2 x float> %i3
3279 }
3280
3281 define amdgpu_kernel void @s_fabs_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3282 ; SI-LABEL: s_fabs_select_infloop_regression_f32:
3283 ; SI:       ; %bb.0:
3284 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
3285 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3286 ; SI-NEXT:    s_bitcmp1_b32 s1, 0
3287 ; SI-NEXT:    v_mov_b32_e32 v0, s0
3288 ; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3289 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3290 ; SI-NEXT:    v_cndmask_b32_e64 v2, |v0|, 0, s[0:1]
3291 ; SI-NEXT:    v_mov_b32_e32 v0, s2
3292 ; SI-NEXT:    v_mov_b32_e32 v1, s3
3293 ; SI-NEXT:    flat_store_dword v[0:1], v2
3294 ; SI-NEXT:    s_endpgm
3295 ;
3296 ; VI-LABEL: s_fabs_select_infloop_regression_f32:
3297 ; VI:       ; %bb.0:
3298 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
3299 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3300 ; VI-NEXT:    s_bitcmp1_b32 s1, 0
3301 ; VI-NEXT:    v_mov_b32_e32 v0, s0
3302 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3303 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3304 ; VI-NEXT:    v_cndmask_b32_e64 v2, |v0|, 0, s[0:1]
3305 ; VI-NEXT:    v_mov_b32_e32 v0, s2
3306 ; VI-NEXT:    v_mov_b32_e32 v1, s3
3307 ; VI-NEXT:    flat_store_dword v[0:1], v2
3308 ; VI-NEXT:    s_endpgm
3309   %i = select i1 %arg1, float 0.0, float %arg
3310   %i2 = call float @llvm.fabs.f32(float %i)
3311   %i3 = select i1 %arg1, float 0.0, float %i2
3312   store float %i3, ptr addrspace(1) %ptr, align 4
3313   ret void
3314 }
3315
3316 define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) {
3317 ; GCN-LABEL: v_fabs_select_infloop_regression_f32:
3318 ; GCN:       ; %bb.0:
3319 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3320 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3321 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3322 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3323 ; GCN-NEXT:    v_cndmask_b32_e64 v0, |v0|, 0, vcc
3324 ; GCN-NEXT:    s_setpc_b64 s[30:31]
3325   %i = select i1 %arg1, float 0.0, float %arg
3326   %i2 = call float @llvm.fabs.f32(float %i)
3327   %i3 = select i1 %arg1, float 0.0, float %i2
3328   ret float %i3
3329 }
3330
3331 define amdgpu_kernel void @s_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3332 ; SI-LABEL: s_fneg_fabs_select_infloop_regression:
3333 ; SI:       ; %bb.0:
3334 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
3335 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
3336 ; SI-NEXT:    s_bitcmp1_b32 s1, 0
3337 ; SI-NEXT:    v_mov_b32_e32 v0, s0
3338 ; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3339 ; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3340 ; SI-NEXT:    v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1]
3341 ; SI-NEXT:    v_mov_b32_e32 v0, s2
3342 ; SI-NEXT:    v_mov_b32_e32 v1, s3
3343 ; SI-NEXT:    flat_store_dword v[0:1], v2
3344 ; SI-NEXT:    s_endpgm
3345 ;
3346 ; VI-LABEL: s_fneg_fabs_select_infloop_regression:
3347 ; VI:       ; %bb.0:
3348 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
3349 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
3350 ; VI-NEXT:    s_bitcmp1_b32 s1, 0
3351 ; VI-NEXT:    v_mov_b32_e32 v0, s0
3352 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3353 ; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3354 ; VI-NEXT:    v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1]
3355 ; VI-NEXT:    v_mov_b32_e32 v0, s2
3356 ; VI-NEXT:    v_mov_b32_e32 v1, s3
3357 ; VI-NEXT:    flat_store_dword v[0:1], v2
3358 ; VI-NEXT:    s_endpgm
3359   %i = select i1 %arg1, float 0.0, float %arg
3360   %i2 = call float @llvm.fabs.f32(float %i)
3361   %neg.i2 = fneg float %i2
3362   %i3 = select i1 %arg1, float 0.0, float %neg.i2
3363   store float %i3, ptr addrspace(1) %ptr, align 4
3364   ret void
3365 }
3366
3367 define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
3368 ; GCN-LABEL: v_fneg_fabs_select_infloop_regression:
3369 ; GCN:       ; %bb.0:
3370 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3371 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3372 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3373 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3374 ; GCN-NEXT:    v_cndmask_b32_e64 v0, -|v0|, 0, vcc
3375 ; GCN-NEXT:    s_setpc_b64 s[30:31]
3376   %i = select i1 %arg1, float 0.0, float %arg
3377   %i2 = call float @llvm.fabs.f32(float %i)
3378   %neg.i2 = fneg float %i2
3379   %i3 = select i1 %arg1, float 0.0, float %neg.i2
3380   ret float %i3
3381 }
3382
3383 define float @v_fmul_0_fsub_0_infloop_regression(float %arg) {
3384 ; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression:
3385 ; GCN-SAFE:       ; %bb.0: ; %bb
3386 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3387 ; GCN-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
3388 ; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, 0, v0
3389 ; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
3390 ;
3391 ; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression:
3392 ; GCN-NSZ:       ; %bb.0: ; %bb
3393 ; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3394 ; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
3395 ; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
3396 bb:
3397   %i = fmul float %arg, 0.0
3398   %i1 = fsub float 0.0, %i
3399   ret float %i1
3400 }
3401
3402 declare i32 @llvm.amdgcn.workitem.id.x() #1
3403 declare float @llvm.fma.f32(float, float, float) #1
3404 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
3405 declare float @llvm.fmuladd.f32(float, float, float) #1
3406 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
3407 declare float @llvm.fabs.f32(float) #1
3408 declare float @llvm.sin.f32(float) #1
3409 declare float @llvm.trunc.f32(float) #1
3410 declare float @llvm.round.f32(float) #1
3411 declare float @llvm.rint.f32(float) #1
3412 declare float @llvm.nearbyint.f32(float) #1
3413 declare float @llvm.roundeven.f32(float) #1
3414 declare float @llvm.canonicalize.f32(float) #1
3415 declare float @llvm.arithmetic.fence.f32(float) #1
3416 declare float @llvm.minnum.f32(float, float) #1
3417 declare float @llvm.maxnum.f32(float, float) #1
3418 declare double @llvm.minnum.f64(double, double) #1
3419 declare double @llvm.fma.f64(double, double, double) #1
3420
3421 declare float @llvm.amdgcn.sin.f32(float) #1
3422 declare float @llvm.amdgcn.rcp.f32(float) #1
3423 declare float @llvm.amdgcn.rcp.legacy(float) #1
3424 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
3425 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
3426 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
3427
3428 declare half @llvm.fma.f16(half, half, half) #1
3429 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
3430 declare half @llvm.fmuladd.f16(half, half, half) #1
3431 declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
3432 declare half @llvm.sin.f16(half) #1
3433 declare half @llvm.trunc.f16(half) #1
3434 declare half @llvm.round.f16(half) #1
3435 declare half @llvm.rint.f16(half) #1
3436 declare half @llvm.nearbyint.f16(half) #1
3437 declare half @llvm.canonicalize.f16(half) #1
3438 declare half @llvm.minnum.f16(half, half) #1
3439 declare half @llvm.maxnum.f16(half, half) #1
3440 declare half @llvm.amdgcn.sin.f16(half) #1
3441 declare half @llvm.amdgcn.rcp.f16(half) #1
3442
3443 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
3444 attributes #1 = { nounwind readnone }
3445 attributes #2 = { nounwind "unsafe-fp-math"="true" }
3446 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
3447 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }