Support RETURN-FROM in DEF%TR forms
[maxima.git] / share / hompack / fortran / pcgns.f
blob4d47673af356a9221ebdd939cefb9f9649f4dc65
1 SUBROUTINE PCGNS(NN,AA,LENAA,MAXA,PP,RHO,START,WORK,IFLAG)
3 C This subroutine solves a system of equations using the method
4 C of Conjugate Gradients.
5 C
6 C The system to be solved is in the form Bx=b, where
8 C +-- --+ +- -+
9 C | | | | | T = START(k), where
10 C | AA | -PP | | -RHO |
11 C B = | | | , b = | | |START(k)|= max |START(i)|
12 C +--------+-----+ +------+ 1<=i<=NN+1
13 C | E(k)**t | | T |
14 C +-- --+ +- -+
16 C AA is an (NN x NN) symmetric matrix, PP, RHO are (NN x 1) vectors,
17 C b is of length NN+1 and E(k)**t is the ( 1 x (NN+1) ) vector
18 C consisting of all zeros, except for a '1' in its k-th position.
19 C It is assumed that rank [AA,-PP]=NN and B is invertible.
21 C The system is solved by splitting B into two matrices M and L, where
23 C +- -+ +- -+
24 C | | | | |
25 C | AA | c | | -PP-c |
26 C M = | | | , L = u * [E(NN+1)**t], u = | | ,
27 C +------+---+ +-------+
28 C | c | d | | 0 |
29 C +- -+ +- -+
31 C E(NN+1) is the (NN+1) x 1 vector consisting of all zeros except for
32 C a '1' in its last position, and x**t is the transpose of x.
34 C The final solution vector, x, is given by
36 C +- -+
37 C | [sol(u)]*[E(NN+1)**t] |
38 C x = | I - ----------------------------- | * sol(b)
39 C | {[(sol(u))**t]*E(NN+1)}+1.0 |
40 C +- -+
42 C where sol(a)=[M**(-1)]*a. The two systems (Mz=u, Mz=b) are solved
43 C by a preconditioned conjugate gradient algorithm.
47 C Input variables:
49 C NN -- dimension of the matrix packed in AA.
51 C AA -- one dimensional real array containing the leading NN x NN
52 C submatrix of B in packed skyline storage form.
54 C LENAA -- number of elements in the packed array AA.
56 C MAXA -- integer array used for specifying information about AA.
57 C Using packed skyline storage, it has length NN+2, and
58 C stores the indices of the diagonal elements within AA.
59 C MAXA(NN+1) = LENAA + 1 and MAXA(NN+2) = LENAA + NN + 3 - k
60 C (k as defined above) by convention.
61 C (NOTE: The value of MAXA(NN+2) is set by this
62 C subroutine when the preconditioning matrix Q is
63 C initialized.)
65 C For example, using the packed storage scheme,
66 C a symmetric 5 x 5 matrix of the form
68 C +-- --+
69 C | 1 3 0 0 0 |
70 C | 3 2 0 7 0 |
71 C | 0 0 4 6 0 |
72 C | 0 7 6 5 9 |
73 C | 0 0 0 9 8 |
74 C +-- --+
76 C would result in NN=5, LENAA=9, MAXA=(1,2,4,5,8,10,*),
77 C and AA=(1,2,3,4,5,6,7,8,9).
79 C PP -- vector of length NN, used for (NN+1)st column of
80 C augmented matrix B .
82 C RHO -- vector of length NN, negative of top part of right hand
83 C side b .
85 C START -- vector of length NN+1, normally the solution to the
86 C previous linear system; used to determine the index k .
88 C Output variables:
90 C START -- solution vector x of B x = b (defined above).
92 C IFLAG -- normally unchanged on output. If the conjugate gradient
93 C iteration fails to converge in 10*(NN+1) iterations (most
94 C likely due to a singular Jacobian matrix), PCGNS returns
95 C with IFLAG = 4 , and does not compute x.
97 C Working storage:
99 C WORK -- array of length 6*(NN+1) + LENAA :
101 C WORK(1..NN+1) = temporary working storage;
103 C WORK(NN+2..2NN+2) = intermediate solution vector z for Mz=u,
104 C input value is used as initial estimate for z;
106 C WORK(2NN+3..3NN+3) = intermediate solution vector z for Mz=b,
107 C input value is used as initial estimate for z;
109 C WORK(3NN+4..4NN+4) = storage for residual vectors;
111 C WORK(4NN+5..5NN+5) = storage for direction vectors;
113 C WORK(5NN+6.. * ) = storage for the preconditioning matrix
114 C Q, normally of length LENAA+NN+1. A storage scheme for Q
115 C (and AA) other than the default packed skyline storage
116 C scheme can be accomodated by simply extending the length
117 C of WORK (and MAXA), and prodiving different versions of
118 C the subroutines MULTDS, MFACDS, and QIMUDS.
121 C Three user-defined subroutines are required:
123 C MULTDS(y,AA,x,MAXA,NN,LENAA) -- computes y = AA x .
125 C MFACDS(NN,Q,LENAA,MAXA) -- computes the preconditioning matrix
126 C Q based on M. A copy of AA is placed in Q before the call;
127 C after the call, it is assumed that Q contains some factorization
128 C for the preconditioning matrix Q. If no preconditioning is
129 C required, MFACDS may be a dummy subroutine.
131 C QIMUDS(Q,f,MAXA,NN,LENAA) -- computes f := [Q**(-1)]*f for any
132 C vector f, given the factorization of Q produced by subroutine
133 C MFACDS. Again, if no preconditioning is required, QIMUDS
134 C may be a dummy subroutine.
137 C Subroutines and functions called:
139 C BLAS -- DAXPY, DCOPY, DDOT, DNRM2, DSCAL, IDAMAX
140 C D1MACH,MULTDS,MFACDS,QIMUDS
143 INTEGER IFLAG,IMAX,IND,J,K,LENAA,NN,MAXA(NN+2),NP1,NP2,N2P3,
144 & N3P4,N4P5,N5P6
145 DOUBLE PRECISION AA(LENAA),AB,AU,BB,BU,DZNRM,PBNPRD,PP(NN),
146 & PUNPRD,RBNPRD,RBTOL,RHO(NN),RNPRD,RUNPRD,RUTOL,
147 & START(NN+1),STARTK,TEMP,UNRM,WORK(5*(NN+1)+LENAA+NN+1),
148 & ZLEN,ZTOL
149 LOGICAL STILLU,STILLB
151 DOUBLE PRECISION D1MACH,DDOT,DNRM2
152 INTEGER IDAMAX
155 C SET UP BASES FOR VECTORS STORED IN WORK ARRAY.
157 NP1=NN+1
158 NP2=NN+2
159 N2P3=(2*NN)+3
160 N3P4=(3*NN)+4
161 N4P5=(4*NN)+5
162 N5P6=(5*NN)+6
164 C FIND THE ELEMENT OF LARGEST MAGNITUDE IN THE INITIAL VECTOR, AND
165 C RECORD ITS POSITION IN K.
167 K=IDAMAX(NP1,START,1)
168 STARTK=START(K)
170 C INITIALIZE Q, SET VALUES OF MAXA(NN+1) AND MAXA(NN+2),
171 C COMPUTE PRECONDITIONER.
173 CALL DCOPY(LENAA,AA,1,WORK(N5P6),1)
174 MAXA(NN+1)=LENAA+1
175 MAXA(NN+2)=LENAA+NN+3-K
176 CALL MFACDS(NN,WORK(N5P6),LENAA,MAXA)
178 C COMPUTE ALL TOLERANCES NEEDED FOR EXIT CRITERIA.
180 CALL DCOPY(NN,PP,1,WORK,1)
181 IF (K .LT. NP1) WORK(K)=WORK(K)+1.0D0
182 UNRM=DNRM2(NN,WORK,1)
184 IMAX=10*NP1
185 STILLU=.TRUE.
186 STILLB=.TRUE.
187 ZTOL=100.0*D1MACH(4)
188 RBTOL=ZTOL*SQRT(STARTK**2 + DNRM2(NN,RHO,1)**2)
189 RUTOL=ZTOL*UNRM
191 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = u .
193 CALL MULTDS(WORK(N3P4),AA,WORK(NP2),MAXA,NN,LENAA)
194 WORK(N3P4+NN)=WORK(NP2+K-1)
195 IND=N3P4+K-1
196 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP2+NN)
197 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
198 CALL DAXPY(NN,-1.0D0,PP,1,WORK(N3P4),1)
199 IF (K .LT. NP1) WORK(IND)=WORK(IND)-1.0D0
200 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
202 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = u .
204 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
205 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
206 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
207 WORK(N4P5+NN)=WORK(K)
208 IF (K .LT. NP1) WORK(N4P5+K-1)=WORK(N4P5+K-1)+WORK(NP1)
210 RUNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
211 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
215 C DO WHILE ((STILLU) .AND. (J .LE. IMAX))
216 100 IF (.NOT. ((STILLU) .AND. (J .LE. IMAX)) ) GO TO 200
218 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
219 IF (SQRT(RUNPRD) .GT. RUTOL) THEN
220 IF (PUNPRD .EQ. 0.0) THEN
221 CALL MULTDS(WORK(N3P4),AA,WORK(NP2),MAXA,NN,LENAA)
222 WORK(N3P4+NN)=WORK(NP2+K-1)
223 IND=N3P4+K-1
224 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP2+NN)
225 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
226 CALL DAXPY(NN,-1.0D0,PP,1,WORK(N3P4),1)
227 IF (K .LT. NP1) WORK(N3P4+K-1)=WORK(N3P4+K-1)-1.0D0
228 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
229 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
230 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
231 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
232 WORK(N4P5+NN)=WORK(K)
233 IND=N4P5+K-1
234 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP1)
235 RUNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
236 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
237 IF (SQRT(RUNPRD) .LE. RUTOL) THEN
238 STILLU=.FALSE.
239 ENDIF
240 ENDIF
241 IF (STILLU) THEN
242 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED||.
243 AU=RUNPRD/PUNPRD
244 CALL DCOPY(NP1,WORK(NP2),1,WORK,1)
245 CALL DAXPY(NP1,AU,WORK(N4P5),1,WORK(NP2),1)
246 CALL DAXPY(NP1,-1.0D0,WORK(NP2),1,WORK,1)
247 ZLEN=DNRM2(NP1,WORK(NP2),1)
248 DZNRM=DNRM2(NP1,WORK,1)
249 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
250 IF ( (DZNRM/ZLEN) .LT. ZTOL) STILLU=.FALSE.
251 ENDIF
252 ELSE
253 STILLU=.FALSE.
254 ENDIF
256 C IF NO EXIT CRITERIA FOR Mz=u HAVE BEEN MET, CONTINUE.
257 IF (STILLU) THEN
258 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
259 CALL MULTDS(WORK,AA,WORK(N4P5),MAXA,NN,LENAA)
260 WORK(NP1)=WORK(N4P5+K-1)
261 IF (K .LT. NP1) WORK(K)=WORK(K)+WORK(N4P5+NN)
262 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
263 CALL DAXPY(NP1,-AU,WORK,1,WORK(N3P4),1)
264 RNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
265 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
266 BU=RNPRD/RUNPRD
267 RUNPRD=RNPRD
268 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
269 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
270 CALL MULTDS(START,AA,WORK,MAXA,NN,LENAA)
271 START(NP1)=WORK(K)
272 IF (K .LT. NP1) START(K)=START(K)+WORK(NP1)
273 CALL DAXPY(NP1,BU,WORK(N4P5),1,START,1)
274 CALL DCOPY(NP1,START,1,WORK(N4P5),1)
275 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
276 ENDIF
278 J=J+1
279 GO TO 100
280 200 CONTINUE
281 C END DO
283 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
285 IF (J .GT. IMAX) THEN
286 IFLAG=4
287 RETURN
288 ENDIF
290 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = b .
292 CALL MULTDS(WORK(N3P4),AA,WORK(N2P3),MAXA,NN,LENAA)
293 WORK(N3P4+NN)=WORK(N2P3+K-1)
294 IND=N3P4+K-1
295 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(N2P3+NN)
296 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
297 CALL DAXPY(NN,-1.0D0,RHO,1,WORK(N3P4),1)
298 WORK(N3P4+NN)=STARTK+WORK(N3P4+NN)
299 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
301 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = b .
303 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
304 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
305 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
306 WORK(N4P5+NN)=WORK(K)
307 IF (K .LT. NP1) WORK(N4P5+K-1)=WORK(N4P5+K-1)+WORK(NP1)
309 RBNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
310 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
314 C DO WHILE ( STILLB .AND. (J .LE. IMAX) )
315 300 IF (.NOT. ( STILLB .AND. (J .LE. IMAX) ) ) GO TO 400
317 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
318 IF (SQRT(RBNPRD) .GT. RBTOL) THEN
319 IF (PBNPRD .EQ. 0.0) THEN
320 CALL MULTDS(WORK(N3P4),AA,WORK(N2P3),MAXA,NN,LENAA)
321 WORK(N3P4+NN)=WORK(N2P3+K-1)
322 IND=N3P4+K-1
323 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(N2P3+NN)
324 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
325 CALL DAXPY(NN,-1.0D0,RHO,1,WORK(N3P4),1)
326 WORK(N3P4+NN)=STARTK+WORK(N3P4+NN)
327 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
328 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
329 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
330 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
331 WORK(N4P5+NN)=WORK(K)
332 IND=N4P5+K-1
333 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP1)
334 RBNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
335 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
336 IF (SQRT(RBNPRD) .LE. RBTOL) THEN
337 STILLB=.FALSE.
338 ENDIF
339 ENDIF
340 IF (STILLB) THEN
341 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED|| .
342 AB=RBNPRD/PBNPRD
343 CALL DCOPY(NP1,WORK(N2P3),1,WORK,1)
344 CALL DAXPY(NP1,AB,WORK(N4P5),1,WORK(N2P3),1)
345 CALL DAXPY(NP1,-1.0D0,WORK(N2P3),1,WORK,1)
346 ZLEN=DNRM2(NP1,WORK(N2P3),1)
347 DZNRM=DNRM2(NP1,WORK,1)
348 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
349 IF ( (DZNRM/ZLEN) .LT. ZTOL) STILLB=.FALSE.
350 ENDIF
351 ELSE
352 STILLB=.FALSE.
353 ENDIF
355 C IF NO EXIT CRITERIA FOR Mz=b HAVE BEEN MET, CONTINUE.
356 IF (STILLB) THEN
357 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
358 CALL MULTDS(WORK,AA,WORK(N4P5),MAXA,NN,LENAA)
359 WORK(NP1)=WORK(N4P5+K-1)
360 IF (K .LT. NP1) WORK(K)=WORK(K)+WORK(N4P5+NN)
361 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
362 CALL DAXPY(NP1,-AB,WORK,1,WORK(N3P4),1)
363 RNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
364 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
365 BB=RNPRD/RBNPRD
366 RBNPRD=RNPRD
367 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
368 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
369 CALL MULTDS(START,AA,WORK,MAXA,NN,LENAA)
370 START(NP1)=WORK(K)
371 IF (K .LT. NP1) START(K)=START(K)+WORK(NP1)
372 CALL DAXPY(NP1,BB,WORK(N4P5),1,START,1)
373 CALL DCOPY(NP1,START,1,WORK(N4P5),1)
374 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
375 ENDIF
377 J=J+1
378 GO TO 300
379 400 CONTINUE
380 C END DO
382 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
384 IF (J .GT. IMAX) THEN
385 IFLAG=4
386 RETURN
387 ENDIF
389 C COMPUTE FINAL SOLUTION VECTOR X, RETURN IT IN START.
391 TEMP=-WORK(N2P3+NN)/(1.0D0+WORK(NP2+NN))
392 CALL DCOPY(NP1,WORK(N2P3),1,START,1)
393 CALL DAXPY(NP1,TEMP,WORK(NP2),1,START,1)
395 RETURN