mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir

   1 // RUN:   mlir-opt %s -test-math-polynomial-approximation="enable-avx2"        \
   2 // RUN:               -convert-vector-to-scf                                   \
   3 // RUN:               -convert-scf-to-cf                                       \
   4 // RUN:               -convert-arith-to-llvm                                   \
   5 // RUN:               -convert-vector-to-llvm="enable-x86vector"               \
   6 // RUN:               -convert-math-to-llvm                                    \
   7 // RUN:               -convert-func-to-llvm                                    \
   8 // RUN:               -reconcile-unrealized-casts                              \
   9 // RUN: | mlir-cpu-runner                                                      \
  10 // RUN:     -e main -entry-point-result=void -O0                               \
  11 // RUN:     -shared-libs=%mlir_c_runner_utils  \
  12 // RUN:     -shared-libs=%mlir_runner_utils    \
  13 // RUN: | FileCheck %s
  14
  15 // -------------------------------------------------------------------------- //
  16 // rsqrt.
  17 // -------------------------------------------------------------------------- //
  18
  19 func.func @rsqrt() {
  20   // Sanity-check that the scalar rsqrt still works OK.
  21   // CHECK: inf
  22   %0 = arith.constant 0.0 : f32
  23   %rsqrt_0 = math.rsqrt %0 : f32
  24   vector.print %rsqrt_0 : f32
  25   // CHECK: 0.707107
  26   %two = arith.constant 2.0: f32
  27   %rsqrt_two = math.rsqrt %two : f32
  28   vector.print %rsqrt_two : f32
  29
  30   // Check that the vectorized approximation is reasonably accurate.
  31   // CHECK: 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107
  32   %vec8 = arith.constant dense<2.0> : vector<8xf32>
  33   %rsqrt_vec8 = math.rsqrt %vec8 : vector<8xf32>
  34   vector.print %rsqrt_vec8 : vector<8xf32>
  35
  36   return
  37 }
  38
  39 func.func @main() {
  40   call @rsqrt(): () -> ()
  41   return
  42 }