mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir

   1 // RUN: mlir-opt %s \
   2 // RUN:   --sparsification --sparse-tensor-conversion \
   3 // RUN:   --convert-vector-to-scf --convert-scf-to-std \
   4 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
   5 // RUN:   --std-bufferize --finalizing-bufferize  \
   6 // RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \
   7 // RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
   8 // RUN: mlir-cpu-runner \
   9 // RUN:  -e entry -entry-point-result=void  \
  10 // RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
  11 // RUN: FileCheck %s
  12 //
  13 // Do the same run, but now with SIMDization as well. This should not change the outcome.
  14 //
  15 // RUN: mlir-opt %s \
  16 // RUN:   --sparsification="vectorization-strategy=2 vl=16 enable-simd-index32" --sparse-tensor-conversion \
  17 // RUN:   --convert-vector-to-scf --convert-scf-to-std \
  18 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
  19 // RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
  20 // RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \
  21 // RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
  22 // RUN: mlir-cpu-runner \
  23 // RUN:  -e entry -entry-point-result=void  \
  24 // RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
  25 // RUN: FileCheck %s
  26
  27 !Filename = type !llvm.ptr<i8>
  28
  29 #SparseMatrix = #sparse_tensor.encoding<{
  30   dimLevelType = [ "dense", "compressed" ],
  31   pointerBitWidth = 8,
  32   indexBitWidth = 8
  33 }>
  34
  35 #matvec = {
  36   indexing_maps = [
  37     affine_map<(i,j) -> (i,j)>, // A
  38     affine_map<(i,j) -> (j)>,   // b
  39     affine_map<(i,j) -> (i)>    // x (out)
  40   ],
  41   iterator_types = ["parallel", "reduction"],
  42   doc = "X(i) += A(i,j) * B(j)"
  43 }
  44
  45 //
  46 // Integration test that lowers a kernel annotated as sparse to
  47 // actual sparse code, initializes a matching sparse storage scheme
  48 // from file, and runs the resulting code with the JIT compiler.
  49 //
  50 module {
  51   //
  52   // A kernel that multiplies a sparse matrix A with a dense vector b
  53   // into a dense vector x.
  54   //
  55   func @kernel_matvec(%arga: tensor<?x?xi32, #SparseMatrix>,
  56                       %argb: tensor<?xi32>,
  57                       %argx: tensor<?xi32> {linalg.inplaceable = true})
  58                       -> tensor<?xi32> {
  59     %0 = linalg.generic #matvec
  60       ins(%arga, %argb: tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>)
  61       outs(%argx: tensor<?xi32>) {
  62       ^bb(%a: i32, %b: i32, %x: i32):
  63         %0 = arith.muli %a, %b : i32
  64         %1 = arith.addi %x, %0 : i32
  65         linalg.yield %1 : i32
  66     } -> tensor<?xi32>
  67     return %0 : tensor<?xi32>
  68   }
  69
  70   func private @getTensorFilename(index) -> (!Filename)
  71
  72   //
  73   // Main driver that reads matrix from file and calls the sparse kernel.
  74   //
  75   func @entry() {
  76     %i0 = arith.constant 0 : i32
  77     %c0 = arith.constant 0 : index
  78     %c1 = arith.constant 1 : index
  79     %c4 = arith.constant 4 : index
  80     %c256 = arith.constant 256 : index
  81
  82     // Read the sparse matrix from file, construct sparse storage.
  83     %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
  84     %a = sparse_tensor.new %fileName : !Filename to tensor<?x?xi32, #SparseMatrix>
  85
  86     // Initialize dense vectors.
  87     %bdata = memref.alloc(%c256) : memref<?xi32>
  88     %xdata = memref.alloc(%c4) : memref<?xi32>
  89     scf.for %i = %c0 to %c256 step %c1 {
  90       %k = arith.addi %i, %c1 : index
  91       %j = arith.index_cast %k : index to i32
  92       memref.store %j, %bdata[%i] : memref<?xi32>
  93     }
  94     scf.for %i = %c0 to %c4 step %c1 {
  95       memref.store %i0, %xdata[%i] : memref<?xi32>
  96     }
  97     %b = bufferization.to_tensor %bdata : memref<?xi32>
  98     %x = bufferization.to_tensor %xdata : memref<?xi32>
  99
 100     // Call kernel.
 101     %0 = call @kernel_matvec(%a, %b, %x)
 102       : (tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
 103
 104     // Print the result for verification.
 105     //
 106     // CHECK: ( 889, 1514, -21, -3431 )
 107     //
 108     %m = bufferization.to_memref %0 : memref<?xi32>
 109     %v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
 110     vector.print %v : vector<4xi32>
 111
 112     // Release the resources.
 113     memref.dealloc %bdata : memref<?xi32>
 114     memref.dealloc %xdata : memref<?xi32>
 115     sparse_tensor.release %a : tensor<?x?xi32, #SparseMatrix>
 116
 117     return
 118   }
 119 }