mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py

   1 # RUN: SUPPORT_LIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
   2 # RUN:   %PYTHON %s | FileCheck %s
   3
   4 import ctypes
   5 import numpy as np
   6 import os
   7
   8 import mlir.all_passes_registration
   9
  10 from mlir import ir
  11 from mlir import runtime as rt
  12 from mlir import execution_engine
  13 from mlir import passmanager
  14
  15 from mlir.dialects import sparse_tensor as st
  16 from mlir.dialects import builtin
  17 from mlir.dialects.linalg.opdsl import lang as dsl
  18
  19
  20 @dsl.linalg_structured_op
  21 def matmul_dsl(
  22     A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
  23     B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
  24     C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)):
  25   C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
  26
  27
  28 def build_SpMM(attr: st.EncodingAttr):
  29   """Build SpMM kernel.
  30
  31   This method generates a linalg op with for matrix multiplication using
  32   just the Python API. Effectively, a generic linalg op is constructed
  33   that computes C(i,j) += A(i,k) * B(k,j) for annotated matrix A.
  34   """
  35   module = ir.Module.create()
  36   f64 = ir.F64Type.get()
  37   a = ir.RankedTensorType.get([3, 4], f64, attr)
  38   b = ir.RankedTensorType.get([4, 2], f64)
  39   c = ir.RankedTensorType.get([3, 2], f64)
  40   arguments = [a, b, c]
  41   with ir.InsertionPoint(module.body):
  42
  43     @builtin.FuncOp.from_py_func(*arguments)
  44     def spMxM(*args):
  45       return matmul_dsl(args[0], args[1], outs=[args[2]])
  46
  47   return module
  48
  49
  50 def boilerplate(attr: st.EncodingAttr):
  51   """Returns boilerplate main method.
  52
  53   This method sets up a boilerplate main method that takes three tensors
  54   (a, b, c), converts the first tensor a into s sparse tensor, and then
  55   calls the sparse kernel for matrix multiplication. For convenience,
  56   this part is purely done as string input.
  57   """
  58   return f"""
  59 func @main(%ad: tensor<3x4xf64>, %b: tensor<4x2xf64>, %c: tensor<3x2xf64>) -> tensor<3x2xf64>
  60   attributes {{ llvm.emit_c_interface }} {{
  61   %a = sparse_tensor.convert %ad : tensor<3x4xf64> to tensor<3x4xf64, {attr}>
  62   %0 = call @spMxM(%a, %b, %c) : (tensor<3x4xf64, {attr}>,
  63                                   tensor<4x2xf64>,
  64                                   tensor<3x2xf64>) -> tensor<3x2xf64>
  65   return %0 : tensor<3x2xf64>
  66 }}
  67 """
  68
  69
  70 def build_compile_and_run_SpMM(attr: st.EncodingAttr, support_lib: str,
  71                                compiler):
  72   # Build.
  73   module = build_SpMM(attr)
  74   func = str(module.operation.regions[0].blocks[0].operations[0].operation)
  75   module = ir.Module.parse(func + boilerplate(attr))
  76
  77   # Compile.
  78   compiler(module)
  79   engine = execution_engine.ExecutionEngine(
  80       module, opt_level=0, shared_libs=[support_lib])
  81
  82   # Set up numpy input and buffer for output.
  83   a = np.array(
  84       [[1.1, 0.0, 0.0, 1.4], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.3, 0.0]],
  85       np.float64)
  86   b = np.array([[1.0, 2.0], [4.0, 3.0], [5.0, 6.0], [8.0, 7.0]], np.float64)
  87   c = np.zeros((3, 2), np.float64)
  88
  89   mem_a = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
  90   mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
  91   mem_c = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(c)))
  92   # Allocate a MemRefDescriptor to receive the output tensor.
  93   # The buffer itself is allocated inside the MLIR code generation.
  94   ref_out = rt.make_nd_memref_descriptor(2, ctypes.c_double)()
  95   mem_out = ctypes.pointer(ctypes.pointer(ref_out))
  96
  97   # Invoke the kernel and get numpy output.
  98   # Built-in bufferization uses in-out buffers.
  99   # TODO: replace with inplace comprehensive bufferization.
 100   engine.invoke('main', mem_out, mem_a, mem_b, mem_c)
 101
 102   # Sanity check on computed result.
 103   expected = np.matmul(a, b);
 104   c = rt.ranked_memref_to_numpy(mem_out[0])
 105   if np.allclose(c, expected):
 106     pass
 107   else:
 108     quit(f'FAILURE')
 109
 110
 111 class SparseCompiler:
 112   """Sparse compiler passes."""
 113
 114   def __init__(self, options: str):
 115     pipeline = (
 116         f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
 117         f'sparsification{{{options}}},'
 118         f'sparse-tensor-conversion,'
 119         f'builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),'
 120         f'convert-scf-to-std,'
 121         f'func-bufferize,'
 122         f'tensor-constant-bufferize,'
 123         f'builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),'
 124         f'convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}},'
 125         f'lower-affine,'
 126         f'convert-memref-to-llvm,'
 127         f'convert-std-to-llvm,'
 128         f'reconcile-unrealized-casts')
 129     self.pipeline = pipeline
 130
 131   def __call__(self, module: ir.Module):
 132     passmanager.PassManager.parse(self.pipeline).run(module)
 133
 134
 135 def main():
 136   support_lib = os.getenv('SUPPORT_LIB')
 137   assert support_lib is not None, 'SUPPORT_LIB is undefined'
 138   if not os.path.exists(support_lib):
 139     raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib)
 140
 141   # CHECK-LABEL: TEST: testSpMM
 142   print('\nTEST: testSpMM')
 143   with ir.Context() as ctx, ir.Location.unknown():
 144     count = 0
 145     # Loop over various ways to compile and annotate the SpMM kernel with
 146     # a *single* sparse tensor. Note that we deliberate do not exhaustively
 147     # search the full state space to reduce runtime of the test. It is
 148     # straightforward to adapt the code below to explore more combinations.
 149     par = 0
 150     vec = 0
 151     vl = 1
 152     e = False
 153     opt = (f'parallelization-strategy={par} '
 154            f'vectorization-strategy={vec} '
 155            f'vl={vl} enable-simd-index32={e}')
 156     levels = [[st.DimLevelType.dense, st.DimLevelType.dense],
 157               [st.DimLevelType.dense, st.DimLevelType.compressed],
 158               [st.DimLevelType.compressed, st.DimLevelType.dense],
 159               [st.DimLevelType.compressed, st.DimLevelType.compressed]]
 160     orderings = [
 161         ir.AffineMap.get_permutation([0, 1]),
 162         ir.AffineMap.get_permutation([1, 0])
 163     ]
 164     bitwidths = [0]
 165     for level in levels:
 166       for ordering in orderings:
 167         for pwidth in bitwidths:
 168           for iwidth in bitwidths:
 169             attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth)
 170             compiler = SparseCompiler(options=opt)
 171             build_compile_and_run_SpMM(attr, support_lib, compiler)
 172             count = count + 1
 173     # CHECK: Passed 8 tests
 174     print('Passed ', count, 'tests')
 175
 176
 177 if __name__ == '__main__':
 178   main()