1 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
3 module attributes {gpu.container_module} {
5 // CHECK-LABEL: func @matvec
6 // CHECK: llvm.call @mgpuStreamCreate
7 // CHECK: llvm.call @mgpuMemAlloc
8 // CHECK: llvm.call @mgpuMemAlloc
9 // CHECK: llvm.call @mgpuCreateCoo
10 // CHECK: llvm.call @mgpuCreateDnVec
11 // CHECK: llvm.call @mgpuSpMVBufferSize
12 // CHECK: llvm.call @mgpuSpMV
13 // CHECK: llvm.call @mgpuDestroySpMat
14 // CHECK: llvm.call @mgpuDestroyDnVec
15 // CHECK: llvm.call @mgpuStreamSynchronize
16 // CHECK: llvm.call @mgpuStreamDestroy
17 func.func @matvec(%arg0: index) {
18 %token0 = gpu.wait async
19 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
20 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
21 %spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
22 %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
23 %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64
24 %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
25 %token8 = gpu.destroy_sp_mat async [%token7] %spmat
26 %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
31 // CHECK-LABEL: func @matmul
32 // CHECK: llvm.call @mgpuStreamCreate
33 // CHECK: llvm.call @mgpuMemAlloc
34 // CHECK: llvm.call @mgpuMemAlloc
35 // CHECK: llvm.call @mgpuCreateCsr
36 // CHECK: llvm.call @mgpuCreateDnMat
37 // CHECK: llvm.call @mgpuSpMMBufferSize
38 // CHECK: llvm.call @mgpuSpMM
39 // CHECK: llvm.call @mgpuDestroySpMat
40 // CHECK: llvm.call @mgpuDestroyDnMat
41 // CHECK: llvm.call @mgpuStreamSynchronize
42 // CHECK: llvm.call @mgpuStreamDestroy
43 func.func @matmul(%arg0: index) {
44 %token0 = gpu.wait async
45 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
46 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
47 %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
48 %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
49 %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64
50 %token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
51 %token8 = gpu.destroy_sp_mat async [%token7] %spmat
52 %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
57 // CHECK-LABEL: func @spgemm
58 // CHECK: llvm.call @mgpuStreamCreate
59 // CHECK: llvm.call @mgpuMemAlloc
60 // CHECK: llvm.call @mgpuMemAlloc
61 // CHECK: llvm.call @mgpuCreateCsr
62 // CHECK: llvm.call @mgpuCreateCsr
63 // CHECK: llvm.call @mgpuCreateCsr
64 // CHECK: llvm.call @mgpuSpGEMMCreateDescr
65 // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
66 // CHECK: llvm.call @mgpuSpGEMMCompute
67 // CHECK: llvm.call @mgpuSpMatGetSize
68 // CHECK: llvm.call @mgpuSetCsrPointers
69 // CHECK: llvm.call @mgpuSpGEMMCopy
70 // CHECK: llvm.call @mgpuSpGEMMDestroyDescr
71 // CHECK: llvm.call @mgpuDestroySpMat
72 // CHECK: llvm.call @mgpuDestroySpMat
73 // CHECK: llvm.call @mgpuDestroySpMat
74 // CHECK: llvm.call @mgpuStreamSynchronize
75 // CHECK: llvm.call @mgpuStreamDestroy
76 func.func @spgemm(%arg0: index) {
77 %token0 = gpu.wait async
78 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
79 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
80 %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
81 %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
82 %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
83 %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
84 %alloc = memref.alloc() : memref<0xi8> // nullptr
85 %c0 = arith.constant 0 : index
86 %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
87 [%token6]{WORK_ESTIMATION}
88 %spmatA, %spmatB, %spmatC,
89 %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
90 %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
92 %spmatA, %spmatB, %spmatC,
93 %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
94 %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
95 %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
96 %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
97 %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
98 %token13 = gpu.destroy_sp_mat async [%token12] %spmatA
99 %token14 = gpu.destroy_sp_mat async [%token13] %spmatB
100 %token15 = gpu.destroy_sp_mat async [%token14] %spmatC
105 // CHECK-LABEL: func @sddmm
106 // CHECK: llvm.call @mgpuStreamCreate
107 // CHECK: llvm.call @mgpuMemAlloc
108 // CHECK: llvm.call @mgpuMemAlloc
109 // CHECK: llvm.call @mgpuCreateCsr
110 // CHECK: llvm.call @mgpuCreateDnMat
111 // CHECK: llvm.call @mgpuSDDMMBufferSize
112 // CHECK: llvm.call @mgpuSDDMM
113 // CHECK: llvm.call @mgpuDestroySpMat
114 // CHECK: llvm.call @mgpuDestroyDnMat
115 // CHECK: llvm.call @mgpuStreamSynchronize
116 // CHECK: llvm.call @mgpuStreamDestroy
117 func.func @sddmm(%arg0: index) {
118 %token0 = gpu.wait async
119 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
120 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
121 %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
122 %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
123 %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64
124 %token7 = gpu.sddmm async [%token6] %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
125 %token8 = gpu.destroy_sp_mat async [%token7] %spmat
126 %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
131 // CHECK-LABEL: func @csc_and_bsr
132 // CHECK: llvm.call @mgpuStreamCreate
133 // CHECK: llvm.call @mgpuMemAlloc
134 // CHECK: llvm.call @mgpuMemAlloc
135 // CHECK: llvm.call @mgpuCreateCsc
136 // CHECK: llvm.call @mgpuCreateBsr
137 // CHECK: llvm.call @mgpuDestroySpMat
138 // CHECK: llvm.call @mgpuDestroySpMat
139 // CHECK: llvm.call @mgpuStreamSynchronize
140 // CHECK: llvm.call @mgpuStreamDestroy
141 func.func @csc_and_bsr(%arg0: index) {
142 %token0 = gpu.wait async
143 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
144 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
145 %csc, %token3 = gpu.create_csc async [%token2]
146 %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
147 : memref<?xindex>, memref<?xindex>, memref<?xf64>
148 %bsr, %token4 = gpu.create_bsr async [%token3]
149 %arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
150 : memref<?xindex>, memref<?xindex>, memref<?xf64>
151 %token5 = gpu.destroy_sp_mat async [%token4] %csc
152 %token6 = gpu.destroy_sp_mat async [%token5] %bsr