1 /**********************************************************************\
2 ______ __ __ _______ _______ __ __ __ __ __ ___
3 / || | | | | ____|| ____|| | | | | \ | | | |/ /
4 | ,----'| | | | | |__ | |__ | | | | | \| | | ' /
5 | | | | | | | __| | __| | | | | | . ` | | <
6 | `----.| `--' | | | | | | `----.| | | |\ | | . \
7 \______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
11 cufflink is a library for linking numerical methods based on Nvidia's
12 Compute Unified Device Architecture (CUDA™) C/C++ programming language
15 Please note that cufflink is not approved or endorsed by OpenCFD®
16 Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
17 producer of OpenFOAM® software.
19 The official web-site of OpenCFD® Limited is www.openfoam.com .
21 ------------------------------------------------------------------------
22 This file is part of cufflink.
24 cufflink is free software: you can redistribute it and/or modify
25 it under the terms of the GNU General Public License as published by
26 the Free Software Foundation, either version 3 of the License, or
27 (at your option) any later version.
29 cufflink is distributed in the hope that it will be useful,
30 but WITHOUT ANY WARRANTY; without even the implied warranty of
31 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 GNU General Public License for more details.
34 You should have received a copy of the GNU General Public License
35 along with cufflink. If not, see <http://www.gnu.org/licenses/>.
38 Daniel P. Combest. All rights reserved.
39 Modifications by Dominik Christ, Wikki Ltd.
42 diagonal preconditioned conjugate gradient
43 solver for symmetric Matrices using a CUSP CUDA™ based solver.
45 \**********************************************************************/
47 #include "cudaTypes.H"
50 #include <cusp/detail/config.h>
51 #include <cusp/verify.h>
52 #include <cusp/precond/ainv.h>
53 #include <cusp/coo_matrix.h>
54 #include <cusp/blas.h>
55 #include <cusp/multiply.h>
57 extern "C" void bicgAinv
59 cuspEquationSystem* ces,
60 cudaSolverPerformance* solverPerf,
61 ValueType drop_tolerance,
67 cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
68 cusp::array1d<ValueType, MemorySpace> X(ces->X);
69 cusp::array1d<ValueType, MemorySpace> B(ces->B);
71 // Fill in the rest of the diag (rows and col)
72 // Determine row indices of diagonal values and fill A COO matrix
75 A.row_indices.begin(),
76 A.row_indices.begin() + ces->nCells
79 // Determine column indices of diagonal values and fill A COO matrix
82 A.column_indices.begin(),
83 A.column_indices.begin() + ces->nCells
86 // Sorted coo by row and column. speeds code up a little bit more
87 A.sort_by_row_and_column();
90 // #include "cufflink/setGPUStorage.H"
92 if (solverPerf->debugCusp)
94 std::cout << " Using Ainv preconditioner\n";
97 cusp::precond::bridson_ainv<ValueType, MemorySpace> M
106 // Start Krylov solver
107 assert(A.num_rows == A.num_cols); // Sanity check
109 const size_t N = A.num_rows;
111 // Allocate workspace
112 cusp::array1d<ValueType,MemorySpace> y(N);
114 cusp::array1d<ValueType,MemorySpace> p(N);
115 cusp::array1d<ValueType,MemorySpace> r(N);
116 cusp::array1d<ValueType,MemorySpace> r_star(N);
117 cusp::array1d<ValueType,MemorySpace> s(N);
118 cusp::array1d<ValueType,MemorySpace> Mp(N);
119 cusp::array1d<ValueType,MemorySpace> AMp(N);
120 cusp::array1d<ValueType,MemorySpace> Ms(N);
121 cusp::array1d<ValueType,MemorySpace> AMs(N);
124 cusp::multiply(A, X, y);
126 // Define the normalization factor
127 ValueType normFactor = 1.0;
129 # include "buildNormFactor.H"
132 cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
135 cusp::blas::copy(r, p);
138 cusp::blas::copy(r, r_star);
140 ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
143 ValueType normR = cusp::blas::nrm2(r)/normFactor;
144 ValueType normR0 = normR; // Initial residual
145 solverPerf->iRes = normR0;
150 normR > (solverPerf->tol)
151 && count < (solverPerf->maxIter)
152 && normR/normR0 >= (solverPerf->relTol)
153 || count < solverPerf->minIter
157 cusp::multiply(M, p, Mp);
160 cusp::multiply(A, Mp, AMp);
162 // alpha = (r_j, r_star) / (A*M*p, r_star)
163 ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
165 // s_j = r_j - alpha * AMp
166 cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
168 ValueType normS = cusp::blas::nrm2(s)/normFactor;
173 normS > (solverPerf->tol)
174 && normS/normR0 >= (solverPerf->relTol)
180 cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
183 cusp::multiply(A, X, y);
186 cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
187 // not sure we should be measuring r here...need to look again.
188 normR = cusp::blas::nrm2(r)/normFactor;
196 cusp::multiply(M, s, Ms);
199 cusp::multiply(A, Ms, AMs);
201 // omega = (AMs, s) / (AMs, AMs)
202 ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
204 // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
205 cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
207 // r_{j+1} = s_j - omega*A*M*s
208 cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
210 // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
211 ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
212 ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
213 r_r_star_old = r_r_star_new;
215 // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
216 cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
218 // not dure we should be measuring r here...need to look again.
219 normR = cusp::blas::nrm2(r)/normFactor;
226 solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
227 solverPerf->nIterations = count;
232 solverPerf->fRes<=solverPerf->tol
233 || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
236 solverPerf->converged = true;
240 solverPerf->converged = false;
243 // Pass the solution vector back