1 SUBROUTINE PCGNS
(NN
,AA
,LENAA
,MAXA
,PP
,RHO
,START
,WORK
,IFLAG
)
3 C This subroutine solves a system of equations using the method
4 C of Conjugate Gradients.
6 C The system to be solved is in the form Bx=b, where
9 C | | | | | T = START(k), where
10 C | AA | -PP | | -RHO |
11 C B = | | | , b = | | |START(k)|= max |START(i)|
12 C +--------+-----+ +------+ 1<=i<=NN+1
16 C AA is an (NN x NN) symmetric matrix, PP, RHO are (NN x 1) vectors,
17 C b is of length NN+1 and E(k)**t is the ( 1 x (NN+1) ) vector
18 C consisting of all zeros, except for a '1' in its k-th position.
19 C It is assumed that rank [AA,-PP]=NN and B is invertible.
21 C The system is solved by splitting B into two matrices M and L, where
25 C | AA | c | | -PP-c |
26 C M = | | | , L = u * [E(NN+1)**t], u = | | ,
27 C +------+---+ +-------+
31 C E(NN+1) is the (NN+1) x 1 vector consisting of all zeros except for
32 C a '1' in its last position, and x**t is the transpose of x.
34 C The final solution vector, x, is given by
37 C | [sol(u)]*[E(NN+1)**t] |
38 C x = | I - ----------------------------- | * sol(b)
39 C | {[(sol(u))**t]*E(NN+1)}+1.0 |
42 C where sol(a)=[M**(-1)]*a. The two systems (Mz=u, Mz=b) are solved
43 C by a preconditioned conjugate gradient algorithm.
49 C NN -- dimension of the matrix packed in AA.
51 C AA -- one dimensional real array containing the leading NN x NN
52 C submatrix of B in packed skyline storage form.
54 C LENAA -- number of elements in the packed array AA.
56 C MAXA -- integer array used for specifying information about AA.
57 C Using packed skyline storage, it has length NN+2, and
58 C stores the indices of the diagonal elements within AA.
59 C MAXA(NN+1) = LENAA + 1 and MAXA(NN+2) = LENAA + NN + 3 - k
60 C (k as defined above) by convention.
61 C (NOTE: The value of MAXA(NN+2) is set by this
62 C subroutine when the preconditioning matrix Q is
65 C For example, using the packed storage scheme,
66 C a symmetric 5 x 5 matrix of the form
76 C would result in NN=5, LENAA=9, MAXA=(1,2,4,5,8,10,*),
77 C and AA=(1,2,3,4,5,6,7,8,9).
79 C PP -- vector of length NN, used for (NN+1)st column of
80 C augmented matrix B .
82 C RHO -- vector of length NN, negative of top part of right hand
85 C START -- vector of length NN+1, normally the solution to the
86 C previous linear system; used to determine the index k .
90 C START -- solution vector x of B x = b (defined above).
92 C IFLAG -- normally unchanged on output. If the conjugate gradient
93 C iteration fails to converge in 10*(NN+1) iterations (most
94 C likely due to a singular Jacobian matrix), PCGNS returns
95 C with IFLAG = 4 , and does not compute x.
99 C WORK -- array of length 6*(NN+1) + LENAA :
101 C WORK(1..NN+1) = temporary working storage;
103 C WORK(NN+2..2NN+2) = intermediate solution vector z for Mz=u,
104 C input value is used as initial estimate for z;
106 C WORK(2NN+3..3NN+3) = intermediate solution vector z for Mz=b,
107 C input value is used as initial estimate for z;
109 C WORK(3NN+4..4NN+4) = storage for residual vectors;
111 C WORK(4NN+5..5NN+5) = storage for direction vectors;
113 C WORK(5NN+6.. * ) = storage for the preconditioning matrix
114 C Q, normally of length LENAA+NN+1. A storage scheme for Q
115 C (and AA) other than the default packed skyline storage
116 C scheme can be accomodated by simply extending the length
117 C of WORK (and MAXA), and prodiving different versions of
118 C the subroutines MULTDS, MFACDS, and QIMUDS.
121 C Three user-defined subroutines are required:
123 C MULTDS(y,AA,x,MAXA,NN,LENAA) -- computes y = AA x .
125 C MFACDS(NN,Q,LENAA,MAXA) -- computes the preconditioning matrix
126 C Q based on M. A copy of AA is placed in Q before the call;
127 C after the call, it is assumed that Q contains some factorization
128 C for the preconditioning matrix Q. If no preconditioning is
129 C required, MFACDS may be a dummy subroutine.
131 C QIMUDS(Q,f,MAXA,NN,LENAA) -- computes f := [Q**(-1)]*f for any
132 C vector f, given the factorization of Q produced by subroutine
133 C MFACDS. Again, if no preconditioning is required, QIMUDS
134 C may be a dummy subroutine.
137 C Subroutines and functions called:
139 C BLAS -- DAXPY, DCOPY, DDOT, DNRM2, DSCAL, IDAMAX
140 C D1MACH,MULTDS,MFACDS,QIMUDS
143 INTEGER IFLAG
,IMAX
,IND
,J
,K
,LENAA
,NN
,MAXA
(NN
+2),NP1
,NP2
,N2P3
,
145 DOUBLE PRECISION AA
(LENAA
),AB
,AU
,BB
,BU
,DZNRM
,PBNPRD
,PP
(NN
),
146 & PUNPRD
,RBNPRD
,RBTOL
,RHO
(NN
),RNPRD
,RUNPRD
,RUTOL
,
147 & START
(NN
+1),STARTK
,TEMP
,UNRM
,WORK
(5*(NN
+1)+LENAA
+NN
+1),
149 LOGICAL STILLU
,STILLB
151 DOUBLE PRECISION D1MACH
,DDOT
,DNRM2
155 C SET UP BASES FOR VECTORS STORED IN WORK ARRAY.
164 C FIND THE ELEMENT OF LARGEST MAGNITUDE IN THE INITIAL VECTOR, AND
165 C RECORD ITS POSITION IN K.
167 K
=IDAMAX
(NP1
,START
,1)
170 C INITIALIZE Q, SET VALUES OF MAXA(NN+1) AND MAXA(NN+2),
171 C COMPUTE PRECONDITIONER.
173 CALL DCOPY
(LENAA
,AA
,1,WORK
(N5P6
),1)
175 MAXA
(NN
+2)=LENAA
+NN
+3-K
176 CALL MFACDS
(NN
,WORK
(N5P6
),LENAA
,MAXA
)
178 C COMPUTE ALL TOLERANCES NEEDED FOR EXIT CRITERIA.
180 CALL DCOPY
(NN
,PP
,1,WORK
,1)
181 IF (K
.LT
. NP1
) WORK
(K
)=WORK
(K
)+1.0D0
182 UNRM
=DNRM2
(NN
,WORK
,1)
188 RBTOL
=ZTOL*SQRT
(STARTK**2
+ DNRM2
(NN
,RHO
,1)**2)
191 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = u .
193 CALL MULTDS
(WORK
(N3P4
),AA
,WORK
(NP2
),MAXA
,NN
,LENAA
)
194 WORK
(N3P4
+NN
)=WORK
(NP2
+K
-1)
196 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(NP2
+NN
)
197 CALL DSCAL
(NP1
,-1.0D0
,WORK
(N3P4
),1)
198 CALL DAXPY
(NN
,-1.0D0
,PP
,1,WORK
(N3P4
),1)
199 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)-1.0D0
200 CALL QIMUDS
(WORK
(N5P6
),WORK
(N3P4
),MAXA
,NN
,LENAA
)
202 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = u .
204 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
205 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
206 CALL MULTDS
(WORK
(N4P5
),AA
,WORK
,MAXA
,NN
,LENAA
)
207 WORK
(N4P5
+NN
)=WORK
(K
)
208 IF (K
.LT
. NP1
) WORK
(N4P5
+K
-1)=WORK
(N4P5
+K
-1)+WORK
(NP1
)
210 RUNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
211 PUNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
215 C DO WHILE ((STILLU) .AND. (J .LE. IMAX))
216 100 IF (.NOT
. ((STILLU
) .AND
. (J
.LE
. IMAX
)) ) GO TO 200
218 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
219 IF (SQRT
(RUNPRD
) .GT
. RUTOL
) THEN
220 IF (PUNPRD
.EQ
. 0.0) THEN
221 CALL MULTDS
(WORK
(N3P4
),AA
,WORK
(NP2
),MAXA
,NN
,LENAA
)
222 WORK
(N3P4
+NN
)=WORK
(NP2
+K
-1)
224 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(NP2
+NN
)
225 CALL DSCAL
(NP1
,-1.0D0
,WORK
(N3P4
),1)
226 CALL DAXPY
(NN
,-1.0D0
,PP
,1,WORK
(N3P4
),1)
227 IF (K
.LT
. NP1
) WORK
(N3P4
+K
-1)=WORK
(N3P4
+K
-1)-1.0D0
228 CALL QIMUDS
(WORK
(N5P6
),WORK
(N3P4
),MAXA
,NN
,LENAA
)
229 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
230 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
231 CALL MULTDS
(WORK
(N4P5
),AA
,WORK
,MAXA
,NN
,LENAA
)
232 WORK
(N4P5
+NN
)=WORK
(K
)
234 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(NP1
)
235 RUNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
236 PUNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
237 IF (SQRT
(RUNPRD
) .LE
. RUTOL
) THEN
242 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED||.
244 CALL DCOPY
(NP1
,WORK
(NP2
),1,WORK
,1)
245 CALL DAXPY
(NP1
,AU
,WORK
(N4P5
),1,WORK
(NP2
),1)
246 CALL DAXPY
(NP1
,-1.0D0
,WORK
(NP2
),1,WORK
,1)
247 ZLEN
=DNRM2
(NP1
,WORK
(NP2
),1)
248 DZNRM
=DNRM2
(NP1
,WORK
,1)
249 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
250 IF ( (DZNRM
/ZLEN
) .LT
. ZTOL
) STILLU
=.FALSE
.
256 C IF NO EXIT CRITERIA FOR Mz=u HAVE BEEN MET, CONTINUE.
258 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
259 CALL MULTDS
(WORK
,AA
,WORK
(N4P5
),MAXA
,NN
,LENAA
)
260 WORK
(NP1
)=WORK
(N4P5
+K
-1)
261 IF (K
.LT
. NP1
) WORK
(K
)=WORK
(K
)+WORK
(N4P5
+NN
)
262 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
263 CALL DAXPY
(NP1
,-AU
,WORK
,1,WORK
(N3P4
),1)
264 RNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
265 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
268 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
269 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
270 CALL MULTDS
(START
,AA
,WORK
,MAXA
,NN
,LENAA
)
272 IF (K
.LT
. NP1
) START
(K
)=START
(K
)+WORK
(NP1
)
273 CALL DAXPY
(NP1
,BU
,WORK
(N4P5
),1,START
,1)
274 CALL DCOPY
(NP1
,START
,1,WORK
(N4P5
),1)
275 PUNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
283 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
285 IF (J
.GT
. IMAX
) THEN
290 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = b .
292 CALL MULTDS
(WORK
(N3P4
),AA
,WORK
(N2P3
),MAXA
,NN
,LENAA
)
293 WORK
(N3P4
+NN
)=WORK
(N2P3
+K
-1)
295 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(N2P3
+NN
)
296 CALL DSCAL
(NP1
,-1.0D0
,WORK
(N3P4
),1)
297 CALL DAXPY
(NN
,-1.0D0
,RHO
,1,WORK
(N3P4
),1)
298 WORK
(N3P4
+NN
)=STARTK
+WORK
(N3P4
+NN
)
299 CALL QIMUDS
(WORK
(N5P6
),WORK
(N3P4
),MAXA
,NN
,LENAA
)
301 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = b .
303 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
304 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
305 CALL MULTDS
(WORK
(N4P5
),AA
,WORK
,MAXA
,NN
,LENAA
)
306 WORK
(N4P5
+NN
)=WORK
(K
)
307 IF (K
.LT
. NP1
) WORK
(N4P5
+K
-1)=WORK
(N4P5
+K
-1)+WORK
(NP1
)
309 RBNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
310 PBNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
314 C DO WHILE ( STILLB .AND. (J .LE. IMAX) )
315 300 IF (.NOT
. ( STILLB
.AND
. (J
.LE
. IMAX
) ) ) GO TO 400
317 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
318 IF (SQRT
(RBNPRD
) .GT
. RBTOL
) THEN
319 IF (PBNPRD
.EQ
. 0.0) THEN
320 CALL MULTDS
(WORK
(N3P4
),AA
,WORK
(N2P3
),MAXA
,NN
,LENAA
)
321 WORK
(N3P4
+NN
)=WORK
(N2P3
+K
-1)
323 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(N2P3
+NN
)
324 CALL DSCAL
(NP1
,-1.0D0
,WORK
(N3P4
),1)
325 CALL DAXPY
(NN
,-1.0D0
,RHO
,1,WORK
(N3P4
),1)
326 WORK
(N3P4
+NN
)=STARTK
+WORK
(N3P4
+NN
)
327 CALL QIMUDS
(WORK
(N5P6
),WORK
(N3P4
),MAXA
,NN
,LENAA
)
328 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
329 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
330 CALL MULTDS
(WORK
(N4P5
),AA
,WORK
,MAXA
,NN
,LENAA
)
331 WORK
(N4P5
+NN
)=WORK
(K
)
333 IF (K
.LT
. NP1
) WORK
(IND
)=WORK
(IND
)+WORK
(NP1
)
334 RBNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
335 PBNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
336 IF (SQRT
(RBNPRD
) .LE
. RBTOL
) THEN
341 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED|| .
343 CALL DCOPY
(NP1
,WORK
(N2P3
),1,WORK
,1)
344 CALL DAXPY
(NP1
,AB
,WORK
(N4P5
),1,WORK
(N2P3
),1)
345 CALL DAXPY
(NP1
,-1.0D0
,WORK
(N2P3
),1,WORK
,1)
346 ZLEN
=DNRM2
(NP1
,WORK
(N2P3
),1)
347 DZNRM
=DNRM2
(NP1
,WORK
,1)
348 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
349 IF ( (DZNRM
/ZLEN
) .LT
. ZTOL
) STILLB
=.FALSE
.
355 C IF NO EXIT CRITERIA FOR Mz=b HAVE BEEN MET, CONTINUE.
357 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
358 CALL MULTDS
(WORK
,AA
,WORK
(N4P5
),MAXA
,NN
,LENAA
)
359 WORK
(NP1
)=WORK
(N4P5
+K
-1)
360 IF (K
.LT
. NP1
) WORK
(K
)=WORK
(K
)+WORK
(N4P5
+NN
)
361 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
362 CALL DAXPY
(NP1
,-AB
,WORK
,1,WORK
(N3P4
),1)
363 RNPRD
=DDOT
(NP1
,WORK
(N3P4
),1,WORK
(N3P4
),1)
364 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
367 CALL DCOPY
(NP1
,WORK
(N3P4
),1,WORK
,1)
368 CALL QIMUDS
(WORK
(N5P6
),WORK
,MAXA
,NN
,LENAA
)
369 CALL MULTDS
(START
,AA
,WORK
,MAXA
,NN
,LENAA
)
371 IF (K
.LT
. NP1
) START
(K
)=START
(K
)+WORK
(NP1
)
372 CALL DAXPY
(NP1
,BB
,WORK
(N4P5
),1,START
,1)
373 CALL DCOPY
(NP1
,START
,1,WORK
(N4P5
),1)
374 PBNPRD
=DDOT
(NP1
,WORK
(N4P5
),1,WORK
(N4P5
),1)
382 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
384 IF (J
.GT
. IMAX
) THEN
389 C COMPUTE FINAL SOLUTION VECTOR X, RETURN IT IN START.
391 TEMP
=-WORK
(N2P3
+NN
)/(1.0D0
+WORK
(NP2
+NN
))
392 CALL DCOPY
(NP1
,WORK
(N2P3
),1,START
,1)
393 CALL DAXPY
(NP1
,TEMP
,WORK
(NP2
),1,START
,1)