1 /**********************************************************************\
2 ______ __ __ _______ _______ __ __ __ __ __ ___
3 / || | | | | ____|| ____|| | | | | \ | | | |/ /
4 | ,----'| | | | | |__ | |__ | | | | | \| | | ' /
5 | | | | | | | __| | __| | | | | | . ` | | <
6 | `----.| `--' | | | | | | `----.| | | |\ | | . \
7 \______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
11 cufflink is a library for linking numerical methods based on Nvidia's
12 Compute Unified Device Architecture (CUDA™) C/C++ programming language
15 Please note that cufflink is not approved or endorsed by OpenCFD®
16 Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
17 producer of OpenFOAM® software.
19 The official web-site of OpenCFD® Limited is www.openfoam.com .
21 ------------------------------------------------------------------------
22 This file is part of cufflink.
24 cufflink is free software: you can redistribute it and/or modify
25 it under the terms of the GNU General Public License as published by
26 the Free Software Foundation, either version 3 of the License, or
27 (at your option) any later version.
29 cufflink is distributed in the hope that it will be useful,
30 but WITHOUT ANY WARRANTY; without even the implied warranty of
31 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 GNU General Public License for more details.
34 You should have received a copy of the GNU General Public License
35 along with cufflink. If not, see <http://www.gnu.org/licenses/>.
38 Daniel P. Combest. All rights reserved.
39 Modifications by Dominik Christ, Wikki Ltd.
42 diagonal preconditioned conjugate gradient
43 solver for symmetric Matrices using a CUSP CUDA™ based solver.
45 \**********************************************************************/
47 #include "cudaTypes.H"
50 #include <cusp/detail/config.h>
51 #include <cusp/verify.h>
52 #include <cusp/precond/diagonal.h>
53 #include <cusp/coo_matrix.h>
54 #include <cusp/blas.h>
55 #include <cusp/multiply.h>
57 extern "C" void bicgDiag
59 cuspEquationSystem* ces,
60 cudaSolverPerformance* solverPerf
63 cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
64 cusp::array1d<ValueType, MemorySpace> X(ces->X);
65 cusp::array1d<ValueType, MemorySpace> B(ces->B);
67 // Fill in the rest of the diag (rows and col)
68 // Determine row indices of diagonal values and fill A COO matrix
71 A.row_indices.begin(),
72 A.row_indices.begin() + ces->nCells
75 // Determine column indices of diagonal values and fill A COO matrix
78 A.column_indices.begin(),
79 A.column_indices.begin() + ces->nCells
82 // Sorted coo by row and column. speeds code up a little bit more
83 A.sort_by_row_and_column();
86 //# include "setGPUStorage.H"
88 if (solverPerf->debugCusp)
90 std::cout << " Using Ainv preconditioner\n";
93 cusp::precond::diagonal<ValueType, MemorySpace> M(A);
95 // Start Krylov solver
96 assert(A.num_rows == A.num_cols); // Sanity check
98 const size_t N = A.num_rows;
100 // Allocate workspace
101 cusp::array1d<ValueType,MemorySpace> y(N);
103 cusp::array1d<ValueType,MemorySpace> p(N);
104 cusp::array1d<ValueType,MemorySpace> r(N);
105 cusp::array1d<ValueType,MemorySpace> r_star(N);
106 cusp::array1d<ValueType,MemorySpace> s(N);
107 cusp::array1d<ValueType,MemorySpace> Mp(N);
108 cusp::array1d<ValueType,MemorySpace> AMp(N);
109 cusp::array1d<ValueType,MemorySpace> Ms(N);
110 cusp::array1d<ValueType,MemorySpace> AMs(N);
113 cusp::multiply(A, X, y);
115 // Define the normalization factor
116 ValueType normFactor = 1.0;
118 # include "buildNormFactor.H"
121 cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
124 cusp::blas::copy(r, p);
127 cusp::blas::copy(r, r_star);
129 ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
132 ValueType normR = cusp::blas::nrm2(r)/normFactor;
133 ValueType normR0 = normR; // Initial residual
134 solverPerf->iRes = normR0;
139 normR > (solverPerf->tol)
140 && count < (solverPerf->maxIter)
141 && normR/normR0 >= (solverPerf->relTol)
142 || count < solverPerf->minIter
146 cusp::multiply(M, p, Mp);
149 cusp::multiply(A, Mp, AMp);
151 // alpha = (r_j, r_star) / (A*M*p, r_star)
152 ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
154 // s_j = r_j - alpha * AMp
155 cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
157 ValueType normS = cusp::blas::nrm2(s)/normFactor;
162 normS > (solverPerf->tol)
163 && normS/normR0 >= (solverPerf->relTol)
169 cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
172 cusp::multiply(A, X, y);
175 cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
176 // not sure we should be measuring r here...need to look again.
177 normR = cusp::blas::nrm2(r)/normFactor;
185 cusp::multiply(M, s, Ms);
188 cusp::multiply(A, Ms, AMs);
190 // omega = (AMs, s) / (AMs, AMs)
191 ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
193 // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
194 cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
196 // r_{j+1} = s_j - omega*A*M*s
197 cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
199 // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
200 ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
201 ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
202 r_r_star_old = r_r_star_new;
204 // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
205 cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
207 // not dure we should be measuring r here...need to look again.
208 normR = cusp::blas::nrm2(r)/normFactor;
215 solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
216 solverPerf->nIterations = count;
221 solverPerf->fRes<=solverPerf->tol
222 || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
225 solverPerf->converged = true;
229 solverPerf->converged = false;
232 // Pass the solution vector back