llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll

   1 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
   2 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
   3 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
   4 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
   5 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
   6 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
   7 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
   8
   9 ; Exponent is a variable
  10 define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x)  {
  11 ; CHECK-LABEL:       @vspow_var
  12 ; CHECK-PWR10:       __powf4_P10
  13 ; CHECK-PWR9:        __powf4_P9
  14 ; CHECK-PWR8:        __powf4_P8
  15 ; CHECK-PWR7:        __powf4_P7
  16 ; CHECK:             blr
  17 entry:
  18   br label %vector.body
  19
  20 vector.body:
  21   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
  22   %next.gep = getelementptr float, float* %z, i64 %index
  23   %next.gep31 = getelementptr float, float* %y, i64 %index
  24   %next.gep32 = getelementptr float, float* %x, i64 %index
  25   %0 = bitcast float* %next.gep32 to <4 x float>*
  26   %wide.load = load <4 x float>, <4 x float>* %0, align 4
  27   %1 = bitcast float* %next.gep31 to <4 x float>*
  28   %wide.load33 = load <4 x float>, <4 x float>* %1, align 4
  29   %2 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33)
  30   %3 = bitcast float* %next.gep to <4 x float>*
  31   store <4 x float> %2, <4 x float>* %3, align 4
  32   %index.next = add i64 %index, 4
  33   %4 = icmp eq i64 %index.next, 1024
  34   br i1 %4, label %for.end, label %vector.body
  35
  36 for.end:
  37   ret void
  38 }
  39
  40 ; Exponent is a constant != 0.75 and !=0.25
  41 define void @vspow_const(float* nocapture %y, float* nocapture readonly %x)  {
  42 ; CHECK-LABEL:       @vspow_const
  43 ; CHECK-PWR10:       __powf4_P10
  44 ; CHECK-PWR9:        __powf4_P9
  45 ; CHECK-PWR8:        __powf4_P8
  46 ; CHECK-PWR7:        __powf4_P7
  47 ; CHECK:             blr
  48 entry:
  49   br label %vector.body
  50
  51 vector.body:
  52   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
  53   %next.gep = getelementptr float, float* %y, i64 %index
  54   %next.gep19 = getelementptr float, float* %x, i64 %index
  55   %0 = bitcast float* %next.gep19 to <4 x float>*
  56   %wide.load = load <4 x float>, <4 x float>* %0, align 4
  57   %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
  58   %2 = bitcast float* %next.gep to <4 x float>*
  59   store <4 x float> %1, <4 x float>* %2, align 4
  60   %index.next = add i64 %index, 4
  61   %3 = icmp eq i64 %index.next, 1024
  62   br i1 %3, label %for.end, label %vector.body
  63
  64 for.end:
  65   ret void
  66 }
  67
  68 ; Exponent is a constant != 0.75 and !=0.25 and they are different
  69 define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x)  {
  70 ; CHECK-LABEL:       @vspow_neq_const
  71 ; CHECK-PWR10:       __powf4_P10
  72 ; CHECK-PWR9:        __powf4_P9
  73 ; CHECK-PWR8:        __powf4_P8
  74 ; CHECK-PWR7:        __powf4_P7
  75 ; CHECK:             blr
  76 entry:
  77   br label %vector.body
  78
  79 vector.body:
  80   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
  81   %next.gep = getelementptr float, float* %y, i64 %index
  82   %next.gep19 = getelementptr float, float* %x, i64 %index
  83   %0 = bitcast float* %next.gep19 to <4 x float>*
  84   %wide.load = load <4 x float>, <4 x float>* %0, align 4
  85   %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
  86   %2 = bitcast float* %next.gep to <4 x float>*
  87   store <4 x float> %1, <4 x float>* %2, align 4
  88   %index.next = add i64 %index, 4
  89   %3 = icmp eq i64 %index.next, 1024
  90   br i1 %3, label %for.end, label %vector.body
  91
  92 for.end:
  93   ret void
  94 }
  95
  96 ; Exponent is a constant != 0.75 and !=0.25
  97 define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x)  {
  98 ; CHECK-LABEL:       @vspow_neq075_const
  99 ; CHECK-PWR10:       __powf4_P10
 100 ; CHECK-PWR9:        __powf4_P9
 101 ; CHECK-PWR8:        __powf4_P8
 102 ; CHECK-PWR7:        __powf4_P7
 103 ; CHECK:             blr
 104 entry:
 105   br label %vector.body
 106
 107 vector.body:
 108   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 109   %next.gep = getelementptr float, float* %y, i64 %index
 110   %next.gep19 = getelementptr float, float* %x, i64 %index
 111   %0 = bitcast float* %next.gep19 to <4 x float>*
 112   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 113   %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
 114   %2 = bitcast float* %next.gep to <4 x float>*
 115   store <4 x float> %1, <4 x float>* %2, align 4
 116   %index.next = add i64 %index, 4
 117   %3 = icmp eq i64 %index.next, 1024
 118   br i1 %3, label %for.end, label %vector.body
 119
 120 for.end:
 121   ret void
 122 }
 123
 124 ; Exponent is a constant != 0.75 and !=0.25
 125 define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x)  {
 126 ; CHECK-LABEL:       @vspow_neq025_const
 127 ; CHECK-PWR10:       __powf4_P10
 128 ; CHECK-PWR9:        __powf4_P9
 129 ; CHECK-PWR8:        __powf4_P8
 130 ; CHECK-PWR7:        __powf4_P7
 131 ; CHECK:             blr
 132 entry:
 133   br label %vector.body
 134
 135 vector.body:
 136   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 137   %next.gep = getelementptr float, float* %y, i64 %index
 138   %next.gep19 = getelementptr float, float* %x, i64 %index
 139   %0 = bitcast float* %next.gep19 to <4 x float>*
 140   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 141   %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
 142   %2 = bitcast float* %next.gep to <4 x float>*
 143   store <4 x float> %1, <4 x float>* %2, align 4
 144   %index.next = add i64 %index, 4
 145   %3 = icmp eq i64 %index.next, 1024
 146   br i1 %3, label %for.end, label %vector.body
 147
 148 for.end:
 149   ret void
 150 }
 151
 152 ; Exponent is 0.75
 153 define void @vspow_075(float* nocapture %y, float* nocapture readonly %x)  {
 154 ; CHECK-LABEL:       @vspow_075
 155 ; CHECK-NOT:         __powf4_P{{[7,8,9,10]}}
 156 ; CHECK:             xvrsqrtesp
 157 ; CHECK:             blr
 158 entry:
 159   br label %vector.body
 160
 161 vector.body:
 162   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 163   %next.gep = getelementptr float, float* %y, i64 %index
 164   %next.gep19 = getelementptr float, float* %x, i64 %index
 165   %0 = bitcast float* %next.gep19 to <4 x float>*
 166   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 167   %1 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
 168   %2 = bitcast float* %next.gep to <4 x float>*
 169   store <4 x float> %1, <4 x float>* %2, align 4
 170   %index.next = add i64 %index, 4
 171   %3 = icmp eq i64 %index.next, 1024
 172   br i1 %3, label %for.end, label %vector.body
 173
 174 for.end:
 175   ret void
 176 }
 177
 178 ; Exponent is 0.25
 179 define void @vspow_025(float* nocapture %y, float* nocapture readonly %x)  {
 180 ; CHECK-LABEL:       @vspow_025
 181 ; CHECK-NOT:         __powf4_P{{[7,8,9,10]}}
 182 ; CHECK:             xvrsqrtesp
 183 ; CHECK:             blr
 184 entry:
 185   br label %vector.body
 186
 187 vector.body:
 188   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 189   %next.gep = getelementptr float, float* %y, i64 %index
 190   %next.gep19 = getelementptr float, float* %x, i64 %index
 191   %0 = bitcast float* %next.gep19 to <4 x float>*
 192   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 193   %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
 194   %2 = bitcast float* %next.gep to <4 x float>*
 195   store <4 x float> %1, <4 x float>* %2, align 4
 196   %index.next = add i64 %index, 4
 197   %3 = icmp eq i64 %index.next, 1024
 198   br i1 %3, label %for.end, label %vector.body
 199
 200 for.end:
 201   ret void
 202 }
 203
 204 ; Exponent is 0.75 but no proper fast-math flags
 205 define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x)  {
 206 ; CHECK-LABEL:       @vspow_075_nofast
 207 ; CHECK-PWR10:       __powf4_P10
 208 ; CHECK-PWR9:        __powf4_P9
 209 ; CHECK-PWR8:        __powf4_P8
 210 ; CHECK-PWR7:        __powf4_P7
 211 ; CHECK-NOT:         xvrsqrtesp
 212 ; CHECK:             blr
 213 entry:
 214   br label %vector.body
 215
 216 vector.body:
 217   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 218   %next.gep = getelementptr float, float* %y, i64 %index
 219   %next.gep19 = getelementptr float, float* %x, i64 %index
 220   %0 = bitcast float* %next.gep19 to <4 x float>*
 221   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 222   %1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
 223   %2 = bitcast float* %next.gep to <4 x float>*
 224   store <4 x float> %1, <4 x float>* %2, align 4
 225   %index.next = add i64 %index, 4
 226   %3 = icmp eq i64 %index.next, 1024
 227   br i1 %3, label %for.end, label %vector.body
 228
 229 for.end:
 230   ret void
 231 }
 232
 233 ; Exponent is 0.25 but no proper fast-math flags
 234 define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x)  {
 235 ; CHECK-LABEL:       @vspow_025_nofast
 236 ; CHECK-PWR10:       __powf4_P10
 237 ; CHECK-PWR9:        __powf4_P9
 238 ; CHECK-PWR8:        __powf4_P8
 239 ; CHECK-PWR7:        __powf4_P7
 240 ; CHECK-NOT:         xvrsqrtesp
 241 ; CHECK:             blr
 242 entry:
 243   br label %vector.body
 244
 245 vector.body:
 246   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
 247   %next.gep = getelementptr float, float* %y, i64 %index
 248   %next.gep19 = getelementptr float, float* %x, i64 %index
 249   %0 = bitcast float* %next.gep19 to <4 x float>*
 250   %wide.load = load <4 x float>, <4 x float>* %0, align 4
 251   %1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
 252   %2 = bitcast float* %next.gep to <4 x float>*
 253   store <4 x float> %1, <4 x float>* %2, align 4
 254   %index.next = add i64 %index, 4
 255   %3 = icmp eq i64 %index.next, 1024
 256   br i1 %3, label %for.end, label %vector.body
 257
 258 for.end:
 259   ret void
 260 }
 261
 262 ; Function Attrs: nounwind readnone speculatable willreturn
 263 declare <4 x float> @__powf4(<4 x float>, <4 x float>)