test/CodeGen/X86/sqrt-partial.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
   4
   5 ; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
   6 ; We have to assume that errno can be set, so we have to make a libcall in that case.
   7 ; But it's better for perf to check that the argument is valid rather than the result of
   8 ; sqrtss/sqrtsd.
   9 ; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
  10 ; for that), but we're checking the final asm to make sure that comes out as expected too.
  11
  12 define float @f(float %val) nounwind {
  13 ; SSE-LABEL: f:
  14 ; SSE:       # %bb.0:
  15 ; SSE-NEXT:    xorps %xmm1, %xmm1
  16 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
  17 ; SSE-NEXT:    jb .LBB0_2
  18 ; SSE-NEXT:  # %bb.1: # %.split
  19 ; SSE-NEXT:    sqrtss %xmm0, %xmm0
  20 ; SSE-NEXT:    retq
  21 ; SSE-NEXT:  .LBB0_2: # %call.sqrt
  22 ; SSE-NEXT:    jmp sqrtf # TAILCALL
  23 ;
  24 ; AVX-LABEL: f:
  25 ; AVX:       # %bb.0:
  26 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  27 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
  28 ; AVX-NEXT:    jb .LBB0_2
  29 ; AVX-NEXT:  # %bb.1: # %.split
  30 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
  31 ; AVX-NEXT:    retq
  32 ; AVX-NEXT:  .LBB0_2: # %call.sqrt
  33 ; AVX-NEXT:    jmp sqrtf # TAILCALL
  34   %res = tail call float @sqrtf(float %val)
  35   ret float %res
  36 }
  37
  38 define double @d(double %val) nounwind {
  39 ; SSE-LABEL: d:
  40 ; SSE:       # %bb.0:
  41 ; SSE-NEXT:    xorps %xmm1, %xmm1
  42 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
  43 ; SSE-NEXT:    jb .LBB1_2
  44 ; SSE-NEXT:  # %bb.1: # %.split
  45 ; SSE-NEXT:    sqrtsd %xmm0, %xmm0
  46 ; SSE-NEXT:    retq
  47 ; SSE-NEXT:  .LBB1_2: # %call.sqrt
  48 ; SSE-NEXT:    jmp sqrt # TAILCALL
  49 ;
  50 ; AVX-LABEL: d:
  51 ; AVX:       # %bb.0:
  52 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  53 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
  54 ; AVX-NEXT:    jb .LBB1_2
  55 ; AVX-NEXT:  # %bb.1: # %.split
  56 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
  57 ; AVX-NEXT:    retq
  58 ; AVX-NEXT:  .LBB1_2: # %call.sqrt
  59 ; AVX-NEXT:    jmp sqrt # TAILCALL
  60   %res = tail call double @sqrt(double %val)
  61   ret double %res
  62 }
  63
  64 define double @minsize(double %x, double %y) minsize {
  65 ; SSE-LABEL: minsize:
  66 ; SSE:       # %bb.0:
  67 ; SSE-NEXT:    mulsd %xmm0, %xmm0
  68 ; SSE-NEXT:    mulsd %xmm1, %xmm1
  69 ; SSE-NEXT:    addsd %xmm0, %xmm1
  70 ; SSE-NEXT:    sqrtsd %xmm1, %xmm0
  71 ; SSE-NEXT:    retq
  72 ;
  73 ; AVX-LABEL: minsize:
  74 ; AVX:       # %bb.0:
  75 ; AVX-NEXT:    vmulsd %xmm0, %xmm0, %xmm0
  76 ; AVX-NEXT:    vmulsd %xmm1, %xmm1, %xmm1
  77 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  78 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
  79 ; AVX-NEXT:    retq
  80   %t3 = fmul fast double %x, %x
  81   %t4 = fmul fast double %y, %y
  82   %t5 = fadd fast double %t3, %t4
  83   %t6 = tail call fast double @llvm.sqrt.f64(double %t5)
  84   ret double %t6
  85 }
  86
  87 ; Partial reg avoidance may involve register allocation
  88 ; rather than adding an instruction.
  89
  90 define double @partial_dep_minsize(double %x, double %y) minsize {
  91 ; SSE-LABEL: partial_dep_minsize:
  92 ; SSE:       # %bb.0:
  93 ; SSE-NEXT:    sqrtsd %xmm1, %xmm0
  94 ; SSE-NEXT:    addsd %xmm1, %xmm0
  95 ; SSE-NEXT:    retq
  96 ;
  97 ; AVX-LABEL: partial_dep_minsize:
  98 ; AVX:       # %bb.0:
  99 ; AVX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm0
 100 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 101 ; AVX-NEXT:    retq
 102   %t6 = tail call fast double @llvm.sqrt.f64(double %y)
 103   %t = fadd fast double %t6, %y
 104   ret double %t
 105 }
 106
 107 declare float @sqrtf(float)
 108 declare double @sqrt(double)
 109 declare double @llvm.sqrt.f64(double)