mlir/benchmark/python/benchmark_sparse.py

   1 """This file contains benchmarks for sparse tensors. In particular, it
   2 contains benchmarks for both mlir sparse tensor dialect and numpy so that they
   3 can be compared against each other.
   4 """
   5 import ctypes
   6 import numpy as np
   7 import os
   8 import re
   9 import time
  10
  11 from mlir import ir
  12 from mlir import runtime as rt
  13 from mlir.dialects import func
  14 from mlir.dialects.linalg.opdsl import lang as dsl
  15 from mlir.execution_engine import ExecutionEngine
  16
  17 from common import create_sparse_np_tensor
  18 from common import emit_timer_func
  19 from common import emit_benchmark_wrapped_main_func
  20 from common import get_kernel_func_from_module
  21 from common import setup_passes
  22
  23
  24 @dsl.linalg_structured_op
  25 def matmul_dsl(
  26     A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
  27     B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
  28     C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True),
  29 ):
  30     """Helper function for mlir sparse matrix multiplication benchmark."""
  31     C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
  32
  33
  34 def benchmark_sparse_mlir_multiplication():
  35     """Benchmark for mlir sparse matrix multiplication. Because its an
  36     MLIR benchmark we need to return both a `compiler` function and a `runner`
  37     function.
  38     """
  39     with ir.Context(), ir.Location.unknown():
  40         module = ir.Module.create()
  41         f64 = ir.F64Type.get()
  42         param1_type = ir.RankedTensorType.get([1000, 1500], f64)
  43         param2_type = ir.RankedTensorType.get([1500, 2000], f64)
  44         result_type = ir.RankedTensorType.get([1000, 2000], f64)
  45         with ir.InsertionPoint(module.body):
  46
  47             @func.FuncOp.from_py_func(param1_type, param2_type, result_type)
  48             def sparse_kernel(x, y, z):
  49                 return matmul_dsl(x, y, outs=[z])
  50
  51     def compiler():
  52         with ir.Context(), ir.Location.unknown():
  53             kernel_func = get_kernel_func_from_module(module)
  54             timer_func = emit_timer_func()
  55             wrapped_func = emit_benchmark_wrapped_main_func(kernel_func, timer_func)
  56             main_module_with_benchmark = ir.Module.parse(
  57                 str(timer_func) + str(wrapped_func) + str(kernel_func)
  58             )
  59             setup_passes(main_module_with_benchmark)
  60             c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
  61             assert os.path.exists(c_runner_utils), (
  62                 f"{c_runner_utils} does not exist."
  63                 f" Please pass a valid value for"
  64                 f" MLIR_C_RUNNER_UTILS environment variable."
  65             )
  66             runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
  67             assert os.path.exists(runner_utils), (
  68                 f"{runner_utils} does not exist."
  69                 f" Please pass a valid value for MLIR_RUNNER_UTILS"
  70                 f" environment variable."
  71             )
  72
  73             engine = ExecutionEngine(
  74                 main_module_with_benchmark,
  75                 3,
  76                 shared_libs=[c_runner_utils, runner_utils],
  77             )
  78             return engine.invoke
  79
  80     def runner(engine_invoke):
  81         compiled_program_args = []
  82         for argument_type in [result_type, param1_type, param2_type, result_type]:
  83             argument_type_str = str(argument_type)
  84             dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
  85             dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
  86             if argument_type == result_type:
  87                 argument = np.zeros(dimensions, np.float64)
  88             else:
  89                 argument = create_sparse_np_tensor(dimensions, 1000)
  90             compiled_program_args.append(
  91                 ctypes.pointer(
  92                     ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
  93                 )
  94             )
  95         np_timers_ns = np.array([0], dtype=np.int64)
  96         compiled_program_args.append(
  97             ctypes.pointer(
  98                 ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
  99             )
 100         )
 101         engine_invoke("main", *compiled_program_args)
 102         return int(np_timers_ns[0])
 103
 104     return compiler, runner
 105
 106
 107 def benchmark_np_matrix_multiplication():
 108     """Benchmark for numpy matrix multiplication. Because its a python
 109     benchmark, we don't have any `compiler` function returned. We just return
 110     the `runner` function.
 111     """
 112
 113     def runner():
 114         argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
 115         argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
 116         start_time = time.time_ns()
 117         np.matmul(argument1, argument2)
 118         return time.time_ns() - start_time
 119
 120     return None, runner