1 # RUN: SUPPORT_LIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
2 # RUN: %PYTHON %s | FileCheck %s
8 import mlir
.all_passes_registration
11 from mlir
import runtime
as rt
12 from mlir
import execution_engine
13 from mlir
import passmanager
15 from mlir
.dialects
import sparse_tensor
as st
16 from mlir
.dialects
import builtin
17 from mlir
.dialects
.linalg
.opdsl
import lang
as dsl
20 @dsl.linalg_structured_op
22 A
=dsl
.TensorDef(dsl
.T
, dsl
.S
.M
, dsl
.S
.K
),
23 B
=dsl
.TensorDef(dsl
.T
, dsl
.S
.K
, dsl
.S
.N
),
24 C
=dsl
.TensorDef(dsl
.T
, dsl
.S
.M
, dsl
.S
.N
, output
=True)):
25 C
[dsl
.D
.m
, dsl
.D
.n
] += A
[dsl
.D
.m
, dsl
.D
.k
] * B
[dsl
.D
.k
, dsl
.D
.n
]
28 def build_SpMM(attr
: st
.EncodingAttr
):
31 This method generates a linalg op with for matrix multiplication using
32 just the Python API. Effectively, a generic linalg op is constructed
33 that computes C(i,j) += A(i,k) * B(k,j) for annotated matrix A.
35 module
= ir
.Module
.create()
36 f64
= ir
.F64Type
.get()
37 a
= ir
.RankedTensorType
.get([3, 4], f64
, attr
)
38 b
= ir
.RankedTensorType
.get([4, 2], f64
)
39 c
= ir
.RankedTensorType
.get([3, 2], f64
)
41 with ir
.InsertionPoint(module
.body
):
43 @builtin.FuncOp
.from_py_func(*arguments
)
45 return matmul_dsl(args
[0], args
[1], outs
=[args
[2]])
50 def boilerplate(attr
: st
.EncodingAttr
):
51 """Returns boilerplate main method.
53 This method sets up a boilerplate main method that takes three tensors
54 (a, b, c), converts the first tensor a into s sparse tensor, and then
55 calls the sparse kernel for matrix multiplication. For convenience,
56 this part is purely done as string input.
59 func @main(%ad: tensor<3x4xf64>, %b: tensor<4x2xf64>, %c: tensor<3x2xf64>) -> tensor<3x2xf64>
60 attributes {{ llvm.emit_c_interface }} {{
61 %a = sparse_tensor.convert %ad : tensor<3x4xf64> to tensor<3x4xf64, {attr}>
62 %0 = call @spMxM(%a, %b, %c) : (tensor<3x4xf64, {attr}>,
64 tensor<3x2xf64>) -> tensor<3x2xf64>
65 return %0 : tensor<3x2xf64>
70 def build_compile_and_run_SpMM(attr
: st
.EncodingAttr
, support_lib
: str,
73 module
= build_SpMM(attr
)
74 func
= str(module
.operation
.regions
[0].blocks
[0].operations
[0].operation
)
75 module
= ir
.Module
.parse(func
+ boilerplate(attr
))
79 engine
= execution_engine
.ExecutionEngine(
80 module
, opt_level
=0, shared_libs
=[support_lib
])
82 # Set up numpy input and buffer for output.
84 [[1.1, 0.0, 0.0, 1.4], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.3, 0.0]],
86 b
= np
.array([[1.0, 2.0], [4.0, 3.0], [5.0, 6.0], [8.0, 7.0]], np
.float64
)
87 c
= np
.zeros((3, 2), np
.float64
)
89 mem_a
= ctypes
.pointer(ctypes
.pointer(rt
.get_ranked_memref_descriptor(a
)))
90 mem_b
= ctypes
.pointer(ctypes
.pointer(rt
.get_ranked_memref_descriptor(b
)))
91 mem_c
= ctypes
.pointer(ctypes
.pointer(rt
.get_ranked_memref_descriptor(c
)))
92 # Allocate a MemRefDescriptor to receive the output tensor.
93 # The buffer itself is allocated inside the MLIR code generation.
94 ref_out
= rt
.make_nd_memref_descriptor(2, ctypes
.c_double
)()
95 mem_out
= ctypes
.pointer(ctypes
.pointer(ref_out
))
97 # Invoke the kernel and get numpy output.
98 # Built-in bufferization uses in-out buffers.
99 # TODO: replace with inplace comprehensive bufferization.
100 engine
.invoke('main', mem_out
, mem_a
, mem_b
, mem_c
)
102 # Sanity check on computed result.
103 expected
= np
.matmul(a
, b
);
104 c
= rt
.ranked_memref_to_numpy(mem_out
[0])
105 if np
.allclose(c
, expected
):
111 class SparseCompiler
:
112 """Sparse compiler passes."""
114 def __init__(self
, options
: str):
116 f
'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
117 f
'sparsification{{{options}}},'
118 f
'sparse-tensor-conversion,'
119 f
'builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),'
120 f
'convert-scf-to-std,'
122 f
'tensor-constant-bufferize,'
123 f
'builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),'
124 f
'convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}},'
126 f
'convert-memref-to-llvm,'
127 f
'convert-std-to-llvm,'
128 f
'reconcile-unrealized-casts')
129 self
.pipeline
= pipeline
131 def __call__(self
, module
: ir
.Module
):
132 passmanager
.PassManager
.parse(self
.pipeline
).run(module
)
136 support_lib
= os
.getenv('SUPPORT_LIB')
137 assert support_lib
is not None, 'SUPPORT_LIB is undefined'
138 if not os
.path
.exists(support_lib
):
139 raise FileNotFoundError(errno
.ENOENT
, os
.strerror(errno
.ENOENT
), support_lib
)
141 # CHECK-LABEL: TEST: testSpMM
142 print('\nTEST: testSpMM')
143 with ir
.Context() as ctx
, ir
.Location
.unknown():
145 # Loop over various ways to compile and annotate the SpMM kernel with
146 # a *single* sparse tensor. Note that we deliberate do not exhaustively
147 # search the full state space to reduce runtime of the test. It is
148 # straightforward to adapt the code below to explore more combinations.
153 opt
= (f
'parallelization-strategy={par} '
154 f
'vectorization-strategy={vec} '
155 f
'vl={vl} enable-simd-index32={e}')
156 levels
= [[st
.DimLevelType
.dense
, st
.DimLevelType
.dense
],
157 [st
.DimLevelType
.dense
, st
.DimLevelType
.compressed
],
158 [st
.DimLevelType
.compressed
, st
.DimLevelType
.dense
],
159 [st
.DimLevelType
.compressed
, st
.DimLevelType
.compressed
]]
161 ir
.AffineMap
.get_permutation([0, 1]),
162 ir
.AffineMap
.get_permutation([1, 0])
166 for ordering
in orderings
:
167 for pwidth
in bitwidths
:
168 for iwidth
in bitwidths
:
169 attr
= st
.EncodingAttr
.get(level
, ordering
, pwidth
, iwidth
)
170 compiler
= SparseCompiler(options
=opt
)
171 build_compile_and_run_SpMM(attr
, support_lib
, compiler
)
173 # CHECK: Passed 8 tests
174 print('Passed ', count
, 'tests')
177 if __name__
== '__main__':