mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir

   1 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
   2
   3 module attributes {gpu.container_module} {
   4
   5   // CHECK-LABEL: func @matvec
   6   // CHECK: llvm.call @mgpuStreamCreate
   7   // CHECK: llvm.call @mgpuMemAlloc
   8   // CHECK: llvm.call @mgpuMemAlloc
   9   // CHECK: llvm.call @mgpuCreateCoo
  10   // CHECK: llvm.call @mgpuCreateDnVec
  11   // CHECK: llvm.call @mgpuSpMVBufferSize
  12   // CHECK: llvm.call @mgpuSpMV
  13   // CHECK: llvm.call @mgpuDestroySpMat
  14   // CHECK: llvm.call @mgpuDestroyDnVec
  15   // CHECK: llvm.call @mgpuStreamSynchronize
  16   // CHECK: llvm.call @mgpuStreamDestroy
  17   func.func @matvec(%arg0: index) {
  18     %token0 = gpu.wait async
  19     %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
  20     %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
  21     %spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
  22     %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
  23     %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec  into f64
  24     %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
  25     %token8 = gpu.destroy_sp_mat async [%token7] %spmat
  26     %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
  27     gpu.wait [%token9]
  28     return
  29   }
  30
  31   // CHECK-LABEL: func @matmul
  32   // CHECK: llvm.call @mgpuStreamCreate
  33   // CHECK: llvm.call @mgpuMemAlloc
  34   // CHECK: llvm.call @mgpuMemAlloc
  35   // CHECK: llvm.call @mgpuCreateCsr
  36   // CHECK: llvm.call @mgpuCreateDnMat
  37   // CHECK: llvm.call @mgpuSpMMBufferSize
  38   // CHECK: llvm.call @mgpuSpMM
  39   // CHECK: llvm.call @mgpuDestroySpMat
  40   // CHECK: llvm.call @mgpuDestroyDnMat
  41   // CHECK: llvm.call @mgpuStreamSynchronize
  42   // CHECK: llvm.call @mgpuStreamDestroy
  43   func.func @matmul(%arg0: index) {
  44     %token0 = gpu.wait async
  45     %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
  46     %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
  47     %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
  48     %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
  49     %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64
  50     %token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
  51     %token8 = gpu.destroy_sp_mat async [%token7] %spmat
  52     %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
  53     gpu.wait [%token9]
  54     return
  55   }
  56
  57   // CHECK-LABEL: func @spgemm
  58   // CHECK: llvm.call @mgpuStreamCreate
  59   // CHECK: llvm.call @mgpuMemAlloc
  60   // CHECK: llvm.call @mgpuMemAlloc
  61   // CHECK: llvm.call @mgpuCreateCsr
  62   // CHECK: llvm.call @mgpuCreateCsr
  63   // CHECK: llvm.call @mgpuCreateCsr
  64   // CHECK: llvm.call @mgpuSpGEMMCreateDescr
  65   // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
  66   // CHECK: llvm.call @mgpuSpGEMMCompute
  67   // CHECK: llvm.call @mgpuSpMatGetSize
  68   // CHECK: llvm.call @mgpuSetCsrPointers
  69   // CHECK: llvm.call @mgpuSpGEMMCopy
  70   // CHECK: llvm.call @mgpuSpGEMMDestroyDescr
  71   // CHECK: llvm.call @mgpuDestroySpMat
  72   // CHECK: llvm.call @mgpuDestroySpMat
  73   // CHECK: llvm.call @mgpuDestroySpMat
  74   // CHECK: llvm.call @mgpuStreamSynchronize
  75   // CHECK: llvm.call @mgpuStreamDestroy
  76   func.func @spgemm(%arg0: index) {
  77     %token0 = gpu.wait async
  78     %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
  79     %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
  80     %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
  81     %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
  82     %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
  83     %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
  84     %alloc = memref.alloc() : memref<0xi8>  // nullptr
  85     %c0 = arith.constant 0 : index
  86     %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
  87                             [%token6]{WORK_ESTIMATION}
  88                             %spmatA, %spmatB, %spmatC,
  89                             %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
  90     %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
  91                                [%token7]{COMPUTE}
  92                                %spmatA, %spmatB, %spmatC,
  93                                %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
  94     %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
  95     %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
  96     %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
  97     %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
  98     %token13 = gpu.destroy_sp_mat async [%token12] %spmatA
  99     %token14 = gpu.destroy_sp_mat async [%token13] %spmatB
 100     %token15 = gpu.destroy_sp_mat async [%token14] %spmatC
 101     gpu.wait [%token15]
 102     return
 103   }
 104
 105   // CHECK-LABEL: func @sddmm
 106   // CHECK: llvm.call @mgpuStreamCreate
 107   // CHECK: llvm.call @mgpuMemAlloc
 108   // CHECK: llvm.call @mgpuMemAlloc
 109   // CHECK: llvm.call @mgpuCreateCsr
 110   // CHECK: llvm.call @mgpuCreateDnMat
 111   // CHECK: llvm.call @mgpuSDDMMBufferSize
 112   // CHECK: llvm.call @mgpuSDDMM
 113   // CHECK: llvm.call @mgpuDestroySpMat
 114   // CHECK: llvm.call @mgpuDestroyDnMat
 115   // CHECK: llvm.call @mgpuStreamSynchronize
 116   // CHECK: llvm.call @mgpuStreamDestroy
 117   func.func @sddmm(%arg0: index) {
 118     %token0 = gpu.wait async
 119     %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
 120     %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
 121     %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
 122     %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
 123     %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64
 124     %token7 = gpu.sddmm async [%token6]  %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
 125     %token8 = gpu.destroy_sp_mat async [%token7] %spmat
 126     %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
 127     gpu.wait [%token9]
 128     return
 129   }
 130
 131   // CHECK-LABEL: func @csc_and_bsr
 132   // CHECK: llvm.call @mgpuStreamCreate
 133   // CHECK: llvm.call @mgpuMemAlloc
 134   // CHECK: llvm.call @mgpuMemAlloc
 135   // CHECK: llvm.call @mgpuCreateCsc
 136   // CHECK: llvm.call @mgpuCreateBsr
 137   // CHECK: llvm.call @mgpuDestroySpMat
 138   // CHECK: llvm.call @mgpuDestroySpMat
 139   // CHECK: llvm.call @mgpuStreamSynchronize
 140   // CHECK: llvm.call @mgpuStreamDestroy
 141   func.func @csc_and_bsr(%arg0: index) {
 142     %token0 = gpu.wait async
 143     %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
 144     %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
 145     %csc, %token3 = gpu.create_csc async [%token2]
 146       %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
 147       : memref<?xindex>, memref<?xindex>, memref<?xf64>
 148     %bsr, %token4 = gpu.create_bsr async [%token3]
 149       %arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
 150       : memref<?xindex>, memref<?xindex>, memref<?xf64>
 151     %token5 = gpu.destroy_sp_mat async [%token4] %csc
 152     %token6 = gpu.destroy_sp_mat async [%token5] %bsr
 153     gpu.wait [%token6]
 154     return
 155   }
 156 }