mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir

   1 // RUN: mlir-opt %s \
   2 // RUN:   --sparsification --sparse-tensor-conversion \
   3 // RUN:   --linalg-bufferize --convert-linalg-to-loops \
   4 // RUN:   --convert-vector-to-scf --convert-scf-to-std \
   5 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
   6 // RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
   7 // RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-math-to-llvm \
   8 // RUN:   --convert-std-to-llvm --reconcile-unrealized-casts | \
   9 // RUN: mlir-cpu-runner \
  10 // RUN:  -e entry -entry-point-result=void  \
  11 // RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
  12 // RUN: FileCheck %s
  13
  14 #DCSR = #sparse_tensor.encoding<{
  15   dimLevelType = [ "compressed", "compressed" ]
  16 }>
  17
  18 #trait_mult_elt = {
  19   indexing_maps = [
  20     affine_map<(i,j) -> (i,j)>,  // A
  21     affine_map<(i,j) -> (i,j)>,  // B
  22     affine_map<(i,j) -> (i,j)>   // X (out)
  23   ],
  24   iterator_types = ["parallel", "parallel"],
  25   doc = "X(i,j) = A(i,j) * B(i,j)"
  26 }
  27
  28 module {
  29   // Sparse kernel.
  30   func @sparse_mult_elt(
  31       %arga: tensor<32x16xf32, #DCSR>, %argb: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
  32     %c16 = arith.constant 16 : index
  33     %c32 = arith.constant 32 : index
  34     %argx = sparse_tensor.init [%c32, %c16] : tensor<32x16xf32, #DCSR>
  35     %0 = linalg.generic #trait_mult_elt
  36       ins(%arga, %argb: tensor<32x16xf32, #DCSR>, tensor<32x16xf32, #DCSR>)
  37       outs(%argx: tensor<32x16xf32, #DCSR>) {
  38         ^bb(%a: f32, %b: f32, %x: f32):
  39           %1 = arith.mulf %a, %b : f32
  40           linalg.yield %1 : f32
  41     } -> tensor<32x16xf32, #DCSR>
  42     return %0 : tensor<32x16xf32, #DCSR>
  43   }
  44
  45   // Driver method to call and verify kernel.
  46   func @entry() {
  47     %c0 = arith.constant 0 : index
  48     %f1 = arith.constant -1.0 : f32
  49
  50     // Setup very sparse matrices.
  51     %ta = arith.constant sparse<
  52        [ [2,2], [15,15], [31,0], [31,14] ], [ 2.0, 3.0, -2.0, 4.0 ]
  53     > : tensor<32x16xf32>
  54     %tb = arith.constant sparse<
  55        [ [1,1], [2,0], [2,2], [2,15], [31,0], [31,15] ], [ 5.0, 6.0, 7.0, 8.0, -10.0, 9.0 ]
  56     > : tensor<32x16xf32>
  57     %sta = sparse_tensor.convert %ta
  58       : tensor<32x16xf32> to tensor<32x16xf32, #DCSR>
  59     %stb = sparse_tensor.convert %tb
  60       : tensor<32x16xf32> to tensor<32x16xf32, #DCSR>
  61
  62     // Call kernel.
  63     %0 = call @sparse_mult_elt(%sta, %stb)
  64       : (tensor<32x16xf32, #DCSR>,
  65          tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR>
  66
  67     //
  68     // Verify results. Only two entries stored in result!
  69     //
  70     // CHECK: ( 14, 20, -1, -1 )
  71     //
  72     %val = sparse_tensor.values %0 : tensor<32x16xf32, #DCSR> to memref<?xf32>
  73     %vv = vector.transfer_read %val[%c0], %f1: memref<?xf32>, vector<4xf32>
  74     vector.print %vv : vector<4xf32>
  75
  76     // Release the resources.
  77     sparse_tensor.release %sta : tensor<32x16xf32, #DCSR>
  78     sparse_tensor.release %stb : tensor<32x16xf32, #DCSR>
  79     sparse_tensor.release %0   : tensor<32x16xf32, #DCSR>
  80     return
  81   }
  82 }