llvm/test/Transforms/SLPVectorizer/X86/call.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=slp-vectorizer,dce -slp-threshold=-999 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
   3
   4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
   5 target triple = "x86_64-apple-macosx10.8.0"
   6
   7 declare double @sin(double) nounwind willreturn
   8 declare double @cos(double) nounwind willreturn
   9 declare double @tan(double) nounwind willreturn
  10 declare double @pow(double, double) nounwind willreturn
  11 declare double @exp2(double) nounwind willreturn
  12 declare double @sqrt(double) nounwind willreturn
  13 declare i64 @round(i64) nounwind willreturn
  14
  15
  16 define void @sin_libm(ptr %a, ptr %b) {
  17 ; CHECK-LABEL: @sin_libm(
  18 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
  19 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
  20 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
  21 ; CHECK-NEXT:    ret void
  22 ;
  23   %a0 = load double, ptr %a, align 8
  24   %idx1 = getelementptr inbounds double, ptr %a, i64 1
  25   %a1 = load double, ptr %idx1, align 8
  26   %sin1 = tail call double @sin(double %a0) nounwind readnone
  27   %sin2 = tail call double @sin(double %a1) nounwind readnone
  28   store double %sin1, ptr %b, align 8
  29   %idx2 = getelementptr inbounds double, ptr %b, i64 1
  30   store double %sin2, ptr %idx2, align 8
  31   ret void
  32 }
  33
  34 define void @cos_libm(ptr %a, ptr %b) {
  35 ; CHECK-LABEL: @cos_libm(
  36 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
  37 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
  38 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
  39 ; CHECK-NEXT:    ret void
  40 ;
  41   %a0 = load double, ptr %a, align 8
  42   %idx1 = getelementptr inbounds double, ptr %a, i64 1
  43   %a1 = load double, ptr %idx1, align 8
  44   %cos1 = tail call double @cos(double %a0) nounwind readnone
  45   %cos2 = tail call double @cos(double %a1) nounwind readnone
  46   store double %cos1, ptr %b, align 8
  47   %idx2 = getelementptr inbounds double, ptr %b, i64 1
  48   store double %cos2, ptr %idx2, align 8
  49   ret void
  50 }
  51
  52 define void @tan_libm(ptr %a, ptr %b) {
  53 ; CHECK-LABEL: @tan_libm(
  54 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
  55 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
  56 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
  57 ; CHECK-NEXT:    ret void
  58 ;
  59   %a0 = load double, ptr %a, align 8
  60   %idx1 = getelementptr inbounds double, ptr %a, i64 1
  61   %a1 = load double, ptr %idx1, align 8
  62   %tan1 = tail call double @tan(double %a0) nounwind readnone
  63   %tan2 = tail call double @tan(double %a1) nounwind readnone
  64   store double %tan1, ptr %b, align 8
  65   %idx2 = getelementptr inbounds double, ptr %b, i64 1
  66   store double %tan2, ptr %idx2, align 8
  67   ret void
  68 }
  69
  70 define void @pow_libm(ptr %a, ptr %b) {
  71 ; CHECK-LABEL: @pow_libm(
  72 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
  73 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
  74 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
  75 ; CHECK-NEXT:    ret void
  76 ;
  77   %a0 = load double, ptr %a, align 8
  78   %idx1 = getelementptr inbounds double, ptr %a, i64 1
  79   %a1 = load double, ptr %idx1, align 8
  80   %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
  81   %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
  82   store double %pow1, ptr %b, align 8
  83   %idx2 = getelementptr inbounds double, ptr %b, i64 1
  84   store double %pow2, ptr %idx2, align 8
  85   ret void
  86 }
  87
  88 define void @exp_libm(ptr %a, ptr %b) {
  89 ; CHECK-LABEL: @exp_libm(
  90 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
  91 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
  92 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
  93 ; CHECK-NEXT:    ret void
  94 ;
  95   %a0 = load double, ptr %a, align 8
  96   %idx1 = getelementptr inbounds double, ptr %a, i64 1
  97   %a1 = load double, ptr %idx1, align 8
  98   %exp1 = tail call double @exp2(double %a0) nounwind readnone
  99   %exp2 = tail call double @exp2(double %a1) nounwind readnone
 100   store double %exp1, ptr %b, align 8
 101   %idx2 = getelementptr inbounds double, ptr %b, i64 1
 102   store double %exp2, ptr %idx2, align 8
 103   ret void
 104 }
 105
 106 ; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
 107 ; We just need to know that errno is not set (readnone).
 108
 109 define void @sqrt_libm_no_errno(ptr %a, ptr %b) {
 110 ; CHECK-LABEL: @sqrt_libm_no_errno(
 111 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 112 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
 113 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 114 ; CHECK-NEXT:    ret void
 115 ;
 116   %a0 = load double, ptr %a, align 8
 117   %idx1 = getelementptr inbounds double, ptr %a, i64 1
 118   %a1 = load double, ptr %idx1, align 8
 119   %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
 120   %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
 121   store double %sqrt1, ptr %b, align 8
 122   %idx2 = getelementptr inbounds double, ptr %b, i64 1
 123   store double %sqrt2, ptr %idx2, align 8
 124   ret void
 125 }
 126
 127 ; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
 128 ; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
 129 ; input would result in a nan output ("On a domain error, the function returns an
 130 ; implementation-defined value.")
 131
 132 define void @sqrt_libm_errno(ptr %a, ptr %b) {
 133 ; CHECK-LABEL: @sqrt_libm_errno(
 134 ; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
 135 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
 136 ; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
 137 ; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]]
 138 ; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]]
 139 ; CHECK-NEXT:    store double [[SQRT1]], ptr [[B:%.*]], align 8
 140 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
 141 ; CHECK-NEXT:    store double [[SQRT2]], ptr [[IDX2]], align 8
 142 ; CHECK-NEXT:    ret void
 143 ;
 144   %a0 = load double, ptr %a, align 8
 145   %idx1 = getelementptr inbounds double, ptr %a, i64 1
 146   %a1 = load double, ptr %idx1, align 8
 147   %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
 148   %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
 149   store double %sqrt1, ptr %b, align 8
 150   %idx2 = getelementptr inbounds double, ptr %b, i64 1
 151   store double %sqrt2, ptr %idx2, align 8
 152   ret void
 153 }
 154
 155 ; Negative test case
 156 define void @round_custom(ptr %a, ptr %b) {
 157 ; CHECK-LABEL: @round_custom(
 158 ; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr [[A:%.*]], align 8
 159 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
 160 ; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr [[IDX1]], align 8
 161 ; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]]
 162 ; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]]
 163 ; CHECK-NEXT:    store i64 [[ROUND1]], ptr [[B:%.*]], align 8
 164 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
 165 ; CHECK-NEXT:    store i64 [[ROUND2]], ptr [[IDX2]], align 8
 166 ; CHECK-NEXT:    ret void
 167 ;
 168   %a0 = load i64, ptr %a, align 8
 169   %idx1 = getelementptr inbounds i64, ptr %a, i64 1
 170   %a1 = load i64, ptr %idx1, align 8
 171   %round1 = tail call i64 @round(i64 %a0) nounwind readnone
 172   %round2 = tail call i64 @round(i64 %a1) nounwind readnone
 173   store i64 %round1, ptr %b, align 8
 174   %idx2 = getelementptr inbounds i64, ptr %b, i64 1
 175   store i64 %round2, ptr %idx2, align 8
 176   ret void
 177 }
 178
 179
 180
 181