llvm/test/CodeGen/PowerPC/fmf-propagation.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; REQUIRES: asserts
   3 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
   4 ; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
   5 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
   6 ; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
   7
   8 ; Test FP transforms using instruction/node-level fast-math-flags.
   9 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
  10 ; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
  11
  12 declare float @llvm.fma.f32(float, float, float)
  13 declare float @llvm.sqrt.f32(float)
  14
  15 ; X * Y + Z --> fma(X, Y, Z)
  16
  17 ; contract bits in fmul is checked.
  18
  19 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
  20 ; FMFDEBUG-NOT:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
  21 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
  22
  23 define float @fmul_fadd_contract1(float %x, float %y, float %z) {
  24 ; FMF-LABEL: fmul_fadd_contract1:
  25 ; FMF:       # %bb.0:
  26 ; FMF-NEXT:    xsmulsp 0, 1, 2
  27 ; FMF-NEXT:    xsaddsp 1, 0, 3
  28 ; FMF-NEXT:    blr
  29 ;
  30 ; GLOBAL-LABEL: fmul_fadd_contract1:
  31 ; GLOBAL:       # %bb.0:
  32 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
  33 ; GLOBAL-NEXT:    fmr 1, 3
  34 ; GLOBAL-NEXT:    blr
  35   %mul = fmul float %x, %y
  36   %add = fadd contract float %mul, %z
  37   ret float %add
  38 }
  39
  40 ; contract bits in fadd is also checked.
  41
  42 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
  43 ; FMFDEBUG-NOT:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
  44 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
  45
  46 define float @fmul_fadd_contract2(float %x, float %y, float %z) {
  47 ; FMF-LABEL: fmul_fadd_contract2:
  48 ; FMF:       # %bb.0:
  49 ; FMF-NEXT:    xsmulsp 0, 1, 2
  50 ; FMF-NEXT:    xsaddsp 1, 0, 3
  51 ; FMF-NEXT:    blr
  52 ;
  53 ; GLOBAL-LABEL: fmul_fadd_contract2:
  54 ; GLOBAL:       # %bb.0:
  55 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
  56 ; GLOBAL-NEXT:    fmr 1, 3
  57 ; GLOBAL-NEXT:    blr
  58   %mul = fmul contract float %x, %y
  59   %add = fadd float %mul, %z
  60   ret float %add
  61 }
  62
  63 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:'
  64 ; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
  65 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:'
  66
  67 define float @fmul_fadd_contract3(float %x, float %y, float %z) {
  68 ; FMF-LABEL: fmul_fadd_contract3:
  69 ; FMF:       # %bb.0:
  70 ; FMF-NEXT:    xsmaddasp 3, 1, 2
  71 ; FMF-NEXT:    fmr 1, 3
  72 ; FMF-NEXT:    blr
  73 ;
  74 ; GLOBAL-LABEL: fmul_fadd_contract3:
  75 ; GLOBAL:       # %bb.0:
  76 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
  77 ; GLOBAL-NEXT:    fmr 1, 3
  78 ; GLOBAL-NEXT:    blr
  79   %mul = fmul contract float %x, %y
  80   %add = fadd contract float %mul, %z
  81   ret float %add
  82 }
  83
  84 ; Reassociation does NOT imply that FMA contraction is allowed.
  85
  86 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
  87 ; FMFDEBUG-NOT:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
  88 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
  89
  90 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
  91 ; FMF-LABEL: fmul_fadd_reassoc1:
  92 ; FMF:       # %bb.0:
  93 ; FMF-NEXT:    xsmulsp 0, 1, 2
  94 ; FMF-NEXT:    xsaddsp 1, 0, 3
  95 ; FMF-NEXT:    blr
  96 ;
  97 ; GLOBAL-LABEL: fmul_fadd_reassoc1:
  98 ; GLOBAL:       # %bb.0:
  99 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
 100 ; GLOBAL-NEXT:    fmr 1, 3
 101 ; GLOBAL-NEXT:    blr
 102   %mul = fmul float %x, %y
 103   %add = fadd reassoc float %mul, %z
 104   ret float %add
 105 }
 106
 107 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 108 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
 109 ; FMFDEBUG-NOT:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
 110 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
 111
 112 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
 113 ; FMF-LABEL: fmul_fadd_reassoc2:
 114 ; FMF:       # %bb.0:
 115 ; FMF-NEXT:    xsmulsp 0, 1, 2
 116 ; FMF-NEXT:    xsaddsp 1, 0, 3
 117 ; FMF-NEXT:    blr
 118 ;
 119 ; GLOBAL-LABEL: fmul_fadd_reassoc2:
 120 ; GLOBAL:       # %bb.0:
 121 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
 122 ; GLOBAL-NEXT:    fmr 1, 3
 123 ; GLOBAL-NEXT:    blr
 124   %mul = fmul reassoc float %x, %y
 125   %add = fadd reassoc float %mul, %z
 126   ret float %add
 127 }
 128
 129 ; The fadd is now fully 'fast', but fmul is not yet.
 130
 131 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
 132 ; FMFDEBUG-NOT:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 133 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
 134
 135 define float @fmul_fadd_fast1(float %x, float %y, float %z) {
 136 ; FMF-LABEL: fmul_fadd_fast1:
 137 ; FMF:       # %bb.0:
 138 ; FMF-NEXT:    xsmulsp 0, 1, 2
 139 ; FMF-NEXT:    xsaddsp 1, 0, 3
 140 ; FMF-NEXT:    blr
 141 ;
 142 ; GLOBAL-LABEL: fmul_fadd_fast1:
 143 ; GLOBAL:       # %bb.0:
 144 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
 145 ; GLOBAL-NEXT:    fmr 1, 3
 146 ; GLOBAL-NEXT:    blr
 147   %mul = fmul float %x, %y
 148   %add = fadd fast float %mul, %z
 149   ret float %add
 150 }
 151
 152 ; This implies that contraction is allowed - the intermediate fmul result is now also flagged.
 153
 154 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
 155 ; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 156 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
 157
 158 define float @fmul_fadd_fast2(float %x, float %y, float %z) {
 159 ; FMF-LABEL: fmul_fadd_fast2:
 160 ; FMF:       # %bb.0:
 161 ; FMF-NEXT:    xsmaddasp 3, 1, 2
 162 ; FMF-NEXT:    fmr 1, 3
 163 ; FMF-NEXT:    blr
 164 ;
 165 ; GLOBAL-LABEL: fmul_fadd_fast2:
 166 ; GLOBAL:       # %bb.0:
 167 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
 168 ; GLOBAL-NEXT:    fmr 1, 3
 169 ; GLOBAL-NEXT:    blr
 170   %mul = fmul fast float %x, %y
 171   %add = fadd fast float %mul, %z
 172   ret float %add
 173 }
 174
 175 ; fma(X, 7.0, X * 42.0) --> X * 49.0
 176 ; This is the minimum FMF needed for this transform - the FMA allows reassociation.
 177
 178 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
 179 ; FMFDEBUG:         fmul reassoc {{t[0-9]+}},
 180 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 181
 182 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
 183 ; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 184 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 185
 186 define float @fmul_fma_reassoc1(float %x) {
 187 ; FMF-LABEL: fmul_fma_reassoc1:
 188 ; FMF:       # %bb.0:
 189 ; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
 190 ; FMF-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
 191 ; FMF-NEXT:    xsmulsp 1, 1, 0
 192 ; FMF-NEXT:    blr
 193 ;
 194 ; GLOBAL-LABEL: fmul_fma_reassoc1:
 195 ; GLOBAL:       # %bb.0:
 196 ; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
 197 ; GLOBAL-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
 198 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 199 ; GLOBAL-NEXT:    blr
 200   %mul = fmul float %x, 42.0
 201   %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
 202   ret float %fma
 203 }
 204
 205 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 206
 207 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
 208 ; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
 209 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 210
 211 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
 212 ; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 213 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 214
 215 define float @fmul_fma_reassoc2(float %x) {
 216 ; FMF-LABEL: fmul_fma_reassoc2:
 217 ; FMF:       # %bb.0:
 218 ; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
 219 ; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
 220 ; FMF-NEXT:    xsmulsp 1, 1, 0
 221 ; FMF-NEXT:    blr
 222 ;
 223 ; GLOBAL-LABEL: fmul_fma_reassoc2:
 224 ; GLOBAL:       # %bb.0:
 225 ; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
 226 ; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
 227 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 228 ; GLOBAL-NEXT:    blr
 229   %mul = fmul reassoc float %x, 42.0
 230   %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
 231   ret float %fma
 232 }
 233
 234 ; The FMA is now fully 'fast'. This implies that reassociation is allowed.
 235
 236 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
 237 ; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 238 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 239
 240 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
 241 ; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 242 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 243
 244 define float @fmul_fma_fast1(float %x) {
 245 ; FMF-LABEL: fmul_fma_fast1:
 246 ; FMF:       # %bb.0:
 247 ; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
 248 ; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
 249 ; FMF-NEXT:    xsmulsp 1, 1, 0
 250 ; FMF-NEXT:    blr
 251 ;
 252 ; GLOBAL-LABEL: fmul_fma_fast1:
 253 ; GLOBAL:       # %bb.0:
 254 ; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
 255 ; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
 256 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 257 ; GLOBAL-NEXT:    blr
 258   %mul = fmul float %x, 42.0
 259   %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
 260   ret float %fma
 261 }
 262
 263 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 264
 265 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
 266 ; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 267 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 268
 269 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
 270 ; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 271 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 272
 273 define float @fmul_fma_fast2(float %x) {
 274 ; FMF-LABEL: fmul_fma_fast2:
 275 ; FMF:       # %bb.0:
 276 ; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
 277 ; FMF-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
 278 ; FMF-NEXT:    xsmulsp 1, 1, 0
 279 ; FMF-NEXT:    blr
 280 ;
 281 ; GLOBAL-LABEL: fmul_fma_fast2:
 282 ; GLOBAL:       # %bb.0:
 283 ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
 284 ; GLOBAL-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
 285 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 286 ; GLOBAL-NEXT:    blr
 287   %mul = fmul fast float %x, 42.0
 288   %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
 289   ret float %fma
 290 }
 291
 292 ; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
 293
 294 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
 295 ; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
 296 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
 297
 298 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
 299 ; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
 300 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
 301
 302 define float @sqrt_afn_ieee(float %x) #0 {
 303 ; FMF-LABEL: sqrt_afn_ieee:
 304 ; FMF:       # %bb.0:
 305 ; FMF-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
 306 ; FMF-NEXT:    xsabsdp 0, 1
 307 ; FMF-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
 308 ; FMF-NEXT:    fcmpu 0, 0, 2
 309 ; FMF-NEXT:    xxlxor 0, 0, 0
 310 ; FMF-NEXT:    blt 0, .LBB11_2
 311 ; FMF-NEXT:  # %bb.1:
 312 ; FMF-NEXT:    xsrsqrtesp 2, 1
 313 ; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
 314 ; FMF-NEXT:    vspltisw 2, -3
 315 ; FMF-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
 316 ; FMF-NEXT:    xsmulsp 1, 1, 2
 317 ; FMF-NEXT:    xsmulsp 0, 1, 0
 318 ; FMF-NEXT:    xsmulsp 1, 1, 2
 319 ; FMF-NEXT:    xvcvsxwdp 2, 34
 320 ; FMF-NEXT:    xsaddsp 1, 1, 2
 321 ; FMF-NEXT:    xsmulsp 0, 0, 1
 322 ; FMF-NEXT:  .LBB11_2:
 323 ; FMF-NEXT:    fmr 1, 0
 324 ; FMF-NEXT:    blr
 325 ;
 326 ; GLOBAL-LABEL: sqrt_afn_ieee:
 327 ; GLOBAL:       # %bb.0:
 328 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
 329 ; GLOBAL-NEXT:    xsabsdp 0, 1
 330 ; GLOBAL-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
 331 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 332 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 333 ; GLOBAL-NEXT:    blt 0, .LBB11_2
 334 ; GLOBAL-NEXT:  # %bb.1:
 335 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 336 ; GLOBAL-NEXT:    vspltisw 2, -3
 337 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
 338 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 339 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 340 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 341 ; GLOBAL-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
 342 ; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 343 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 344 ; GLOBAL-NEXT:  .LBB11_2:
 345 ; GLOBAL-NEXT:    fmr 1, 0
 346 ; GLOBAL-NEXT:    blr
 347   %rt = call afn ninf float @llvm.sqrt.f32(float %x)
 348   ret float %rt
 349 }
 350
 351 define float @sqrt_afn_ieee_inf(float %x) #0 {
 352 ; FMF-LABEL: sqrt_afn_ieee_inf:
 353 ; FMF:       # %bb.0:
 354 ; FMF-NEXT:    xssqrtsp 1, 1
 355 ; FMF-NEXT:    blr
 356 ;
 357 ; GLOBAL-LABEL: sqrt_afn_ieee_inf:
 358 ; GLOBAL:       # %bb.0:
 359 ; GLOBAL-NEXT:    xssqrtsp 1, 1
 360 ; GLOBAL-NEXT:    blr
 361   %rt = call afn float @llvm.sqrt.f32(float %x)
 362   ret float %rt
 363 }
 364
 365 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 366 ; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
 367 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 368
 369 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 370 ; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
 371 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 372
 373 define float @sqrt_afn_preserve_sign(float %x) #1 {
 374 ; FMF-LABEL: sqrt_afn_preserve_sign:
 375 ; FMF:       # %bb.0:
 376 ; FMF-NEXT:    xxlxor 0, 0, 0
 377 ; FMF-NEXT:    fcmpu 0, 1, 0
 378 ; FMF-NEXT:    beq 0, .LBB13_2
 379 ; FMF-NEXT:  # %bb.1:
 380 ; FMF-NEXT:    xsrsqrtesp 0, 1
 381 ; FMF-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
 382 ; FMF-NEXT:    vspltisw 2, -3
 383 ; FMF-NEXT:    lfs 2, .LCPI13_0@toc@l(3)
 384 ; FMF-NEXT:    xsmulsp 1, 1, 0
 385 ; FMF-NEXT:    xsmulsp 2, 1, 2
 386 ; FMF-NEXT:    xsmulsp 0, 1, 0
 387 ; FMF-NEXT:    xvcvsxwdp 1, 34
 388 ; FMF-NEXT:    xsaddsp 0, 0, 1
 389 ; FMF-NEXT:    xsmulsp 0, 2, 0
 390 ; FMF-NEXT:  .LBB13_2:
 391 ; FMF-NEXT:    fmr 1, 0
 392 ; FMF-NEXT:    blr
 393 ;
 394 ; GLOBAL-LABEL: sqrt_afn_preserve_sign:
 395 ; GLOBAL:       # %bb.0:
 396 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 397 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
 398 ; GLOBAL-NEXT:    beq 0, .LBB13_2
 399 ; GLOBAL-NEXT:  # %bb.1:
 400 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 401 ; GLOBAL-NEXT:    vspltisw 2, -3
 402 ; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
 403 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 404 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 405 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 406 ; GLOBAL-NEXT:    lfs 0, .LCPI13_0@toc@l(3)
 407 ; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 408 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 409 ; GLOBAL-NEXT:  .LBB13_2:
 410 ; GLOBAL-NEXT:    fmr 1, 0
 411 ; GLOBAL-NEXT:    blr
 412   %rt = call afn ninf float @llvm.sqrt.f32(float %x)
 413   ret float %rt
 414 }
 415
 416 define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
 417 ; FMF-LABEL: sqrt_afn_preserve_sign_inf:
 418 ; FMF:       # %bb.0:
 419 ; FMF-NEXT:    xssqrtsp 1, 1
 420 ; FMF-NEXT:    blr
 421 ;
 422 ; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
 423 ; GLOBAL:       # %bb.0:
 424 ; GLOBAL-NEXT:    xssqrtsp 1, 1
 425 ; GLOBAL-NEXT:    blr
 426   %rt = call afn float @llvm.sqrt.f32(float %x)
 427   ret float %rt
 428 }
 429
 430 ; The call is now fully 'fast'. This implies that approximation is allowed.
 431
 432 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
 433 ; FMFDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
 434 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
 435
 436 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
 437 ; GLOBALDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
 438 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
 439
 440 define float @sqrt_fast_ieee(float %x) #0 {
 441 ; FMF-LABEL: sqrt_fast_ieee:
 442 ; FMF:       # %bb.0:
 443 ; FMF-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
 444 ; FMF-NEXT:    xsabsdp 0, 1
 445 ; FMF-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
 446 ; FMF-NEXT:    fcmpu 0, 0, 2
 447 ; FMF-NEXT:    xxlxor 0, 0, 0
 448 ; FMF-NEXT:    blt 0, .LBB15_2
 449 ; FMF-NEXT:  # %bb.1:
 450 ; FMF-NEXT:    xsrsqrtesp 0, 1
 451 ; FMF-NEXT:    vspltisw 2, -3
 452 ; FMF-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
 453 ; FMF-NEXT:    xvcvsxwdp 2, 34
 454 ; FMF-NEXT:    xsmulsp 1, 1, 0
 455 ; FMF-NEXT:    xsmaddasp 2, 1, 0
 456 ; FMF-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
 457 ; FMF-NEXT:    xsmulsp 0, 1, 0
 458 ; FMF-NEXT:    xsmulsp 0, 0, 2
 459 ; FMF-NEXT:  .LBB15_2:
 460 ; FMF-NEXT:    fmr 1, 0
 461 ; FMF-NEXT:    blr
 462 ;
 463 ; GLOBAL-LABEL: sqrt_fast_ieee:
 464 ; GLOBAL:       # %bb.0:
 465 ; GLOBAL-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
 466 ; GLOBAL-NEXT:    xsabsdp 0, 1
 467 ; GLOBAL-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
 468 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 469 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 470 ; GLOBAL-NEXT:    blt 0, .LBB15_2
 471 ; GLOBAL-NEXT:  # %bb.1:
 472 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 473 ; GLOBAL-NEXT:    vspltisw 2, -3
 474 ; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
 475 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 476 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 477 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 478 ; GLOBAL-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
 479 ; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 480 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 481 ; GLOBAL-NEXT:  .LBB15_2:
 482 ; GLOBAL-NEXT:    fmr 1, 0
 483 ; GLOBAL-NEXT:    blr
 484   %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
 485   ret float %rt
 486 }
 487
 488 ; The call is now fully 'fast'. This implies that approximation is allowed.
 489
 490 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
 491 ; FMFDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
 492 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
 493
 494 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
 495 ; GLOBALDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
 496 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
 497
 498 define float @sqrt_fast_preserve_sign(float %x) #1 {
 499 ; FMF-LABEL: sqrt_fast_preserve_sign:
 500 ; FMF:       # %bb.0:
 501 ; FMF-NEXT:    xxlxor 0, 0, 0
 502 ; FMF-NEXT:    fcmpu 0, 1, 0
 503 ; FMF-NEXT:    beq 0, .LBB16_2
 504 ; FMF-NEXT:  # %bb.1:
 505 ; FMF-NEXT:    xsrsqrtesp 0, 1
 506 ; FMF-NEXT:    vspltisw 2, -3
 507 ; FMF-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
 508 ; FMF-NEXT:    xvcvsxwdp 2, 34
 509 ; FMF-NEXT:    xsmulsp 1, 1, 0
 510 ; FMF-NEXT:    xsmaddasp 2, 1, 0
 511 ; FMF-NEXT:    lfs 0, .LCPI16_0@toc@l(3)
 512 ; FMF-NEXT:    xsmulsp 0, 1, 0
 513 ; FMF-NEXT:    xsmulsp 0, 0, 2
 514 ; FMF-NEXT:  .LBB16_2:
 515 ; FMF-NEXT:    fmr 1, 0
 516 ; FMF-NEXT:    blr
 517 ;
 518 ; GLOBAL-LABEL: sqrt_fast_preserve_sign:
 519 ; GLOBAL:       # %bb.0:
 520 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 521 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
 522 ; GLOBAL-NEXT:    beq 0, .LBB16_2
 523 ; GLOBAL-NEXT:  # %bb.1:
 524 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 525 ; GLOBAL-NEXT:    vspltisw 2, -3
 526 ; GLOBAL-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
 527 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 528 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 529 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 530 ; GLOBAL-NEXT:    lfs 0, .LCPI16_0@toc@l(3)
 531 ; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 532 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 533 ; GLOBAL-NEXT:  .LBB16_2:
 534 ; GLOBAL-NEXT:    fmr 1, 0
 535 ; GLOBAL-NEXT:    blr
 536   %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
 537   ret float %rt
 538 }
 539
 540 ; fcmp can have fast-math-flags.
 541
 542 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
 543 ; FMFDEBUG:         select_cc nnan {{t[0-9]+}}
 544 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
 545
 546 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
 547 ; GLOBALDEBUG:         select_cc nnan {{t[0-9]+}}
 548 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
 549
 550 define double @fcmp_nnan(double %a, double %y, double %z) {
 551 ; FMF-LABEL: fcmp_nnan:
 552 ; FMF:       # %bb.0:
 553 ; FMF-NEXT:    xxlxor 0, 0, 0
 554 ; FMF-NEXT:    xscmpudp 0, 1, 0
 555 ; FMF-NEXT:    blt 0, .LBB17_2
 556 ; FMF-NEXT:  # %bb.1:
 557 ; FMF-NEXT:    fmr 3, 2
 558 ; FMF-NEXT:  .LBB17_2:
 559 ; FMF-NEXT:    fmr 1, 3
 560 ; FMF-NEXT:    blr
 561 ;
 562 ; GLOBAL-LABEL: fcmp_nnan:
 563 ; GLOBAL:       # %bb.0:
 564 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 565 ; GLOBAL-NEXT:    xscmpudp 0, 1, 0
 566 ; GLOBAL-NEXT:    blt 0, .LBB17_2
 567 ; GLOBAL-NEXT:  # %bb.1:
 568 ; GLOBAL-NEXT:    fmr 3, 2
 569 ; GLOBAL-NEXT:  .LBB17_2:
 570 ; GLOBAL-NEXT:    fmr 1, 3
 571 ; GLOBAL-NEXT:    blr
 572   %cmp = fcmp nnan ult double %a, 0.0
 573   %z.y = select i1 %cmp, double %z, double %y
 574   ret double %z.y
 575 }
 576
 577 ; FP library calls can have fast-math-flags.
 578
 579 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
 580 ; FMFDEBUG:         ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
 581 ; FMFDEBUG:         ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
 582 ; FMFDEBUG:         f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
 583 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
 584
 585 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
 586 ; GLOBALDEBUG:         ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
 587 ; GLOBALDEBUG:         ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
 588 ; GLOBALDEBUG:         f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
 589 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
 590
 591 declare double @log2(double)
 592 define double @log2_approx(double %x) nounwind {
 593 ; FMF-LABEL: log2_approx:
 594 ; FMF:       # %bb.0:
 595 ; FMF-NEXT:    mflr 0
 596 ; FMF-NEXT:    stdu 1, -32(1)
 597 ; FMF-NEXT:    std 0, 48(1)
 598 ; FMF-NEXT:    bl log2
 599 ; FMF-NEXT:    nop
 600 ; FMF-NEXT:    addi 1, 1, 32
 601 ; FMF-NEXT:    ld 0, 16(1)
 602 ; FMF-NEXT:    mtlr 0
 603 ; FMF-NEXT:    blr
 604 ;
 605 ; GLOBAL-LABEL: log2_approx:
 606 ; GLOBAL:       # %bb.0:
 607 ; GLOBAL-NEXT:    mflr 0
 608 ; GLOBAL-NEXT:    stdu 1, -32(1)
 609 ; GLOBAL-NEXT:    std 0, 48(1)
 610 ; GLOBAL-NEXT:    bl log2
 611 ; GLOBAL-NEXT:    nop
 612 ; GLOBAL-NEXT:    addi 1, 1, 32
 613 ; GLOBAL-NEXT:    ld 0, 16(1)
 614 ; GLOBAL-NEXT:    mtlr 0
 615 ; GLOBAL-NEXT:    blr
 616   %r = call afn double @log2(double %x)
 617   ret double %r
 618 }
 619
 620 ; -(X - Y) --> (Y - X)
 621
 622 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
 623 ; FMFDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
 624 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
 625
 626 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
 627 ; GLOBALDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
 628 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
 629
 630 define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
 631 ; FMF-LABEL: fneg_fsub_nozeros_1:
 632 ; FMF:       # %bb.0:
 633 ; FMF-NEXT:    xssubsp 1, 2, 1
 634 ; FMF-NEXT:    blr
 635 ;
 636 ; GLOBAL-LABEL: fneg_fsub_nozeros_1:
 637 ; GLOBAL:       # %bb.0:
 638 ; GLOBAL-NEXT:    xssubsp 1, 2, 1
 639 ; GLOBAL-NEXT:    blr
 640   %neg = fsub float %x, %y
 641   %add = fsub nsz float 0.0, %neg
 642   ret float %add
 643 }
 644
 645 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
 646 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 647 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 648 ; FMFDEBUG: {{.*}}
 649 ; GLOBALDEBUG: {{.*}}