Support RETURN-FROM in DEF%TR forms
[maxima.git] / share / hompack / fortran / pcgds.f
blobab006f6eafb467be39aedd1567077ddcb9bbca9f
1 SUBROUTINE PCGDS(NN,AA,LENAA,MAXA,PP,START,WORK,IFLAG)
3 C This subroutine solves a system of equations using the method
4 C of Conjugate Gradients.
5 C
6 C The system to be solved is in the form Bx=b, where
8 C +-- --+ +- -+
9 C | | | | 0 | T = START(k), where
10 C | AA | -PP | | ... |
11 C B = | | | , b = | 0 | |START(k)|= max |START(i)|
12 C +--------+-----+ +-----+ 1<=i<=NN+1
13 C | E(k)**t | | T |
14 C +-- --+ +- -+
16 C AA is an (NN x NN) symmetric matrix, PP is an (NN x 1) vector,
17 C b is of length NN+1 and E(k)**t is the ( 1 x (NN+1) ) vector
18 C consisting of all zeros, except for a '1' in its k-th position.
19 C It is assumed that rank [AA,-PP]=NN and B is invertible.
21 C The system is solved by splitting B into two matrices M and L, where
23 C +- -+ +- -+
24 C | | | | |
25 C | AA | c | | -PP-c |
26 C M = | | | , L = u * [E(NN+1)**t], u = | | ,
27 C +------+---+ +-------+
28 C | c | d | | 0 |
29 C +- -+ +- -+
31 C E(NN+1) is the (NN+1) x 1 vector consisting of all zeros except for
32 C a '1' in its last position, and x**t is the transpose of x.
34 C The final solution vector, x, is given by
36 C +- -+
37 C | [sol(u)]*[E(NN+1)**t] |
38 C x = | I - ----------------------------- | * sol(b)
39 C | {[(sol(u))**t]*E(NN+1)}+1.0 |
40 C +- -+
42 C where sol(a)=[M**(-1)]*a. The two systems (Mz=u, Mz=b) are solved
43 C by a preconditioned conjugate gradient algorithm.
47 C Input variables:
49 C NN -- dimension of the matrix packed in AA.
51 C AA -- one dimensional real array containing the leading NN x NN
52 C submatrix of B in packed skyline storage form.
54 C LENAA -- number of elements in the packed array AA.
56 C MAXA -- integer array used for specifying information about AA.
57 C Using packed skyline storage, it has length NN+2, and
58 C stores the indices of the diagonal elements within AA.
59 C MAXA(NN+1) = LENAA + 1 and MAXA(NN+2) = LENAA + NN + 3 - k
60 C (k as defined above) by convention.
61 C (NOTE: The value of MAXA(NN+2) is set by this
62 C subroutine when the preconditioning matrix Q is
63 C initialized.)
65 C For example, using the packed storage scheme,
66 C a symmetric 5 x 5 matrix of the form
68 C +-- --+
69 C | 1 3 0 0 0 |
70 C | 3 2 0 7 0 |
71 C | 0 0 4 6 0 |
72 C | 0 7 6 5 9 |
73 C | 0 0 0 9 8 |
74 C +-- --+
76 C would result in NN=5, LENAA=9, MAXA=(1,2,4,5,8,10,*),
77 C and AA=(1,2,3,4,5,6,7,8,9).
79 C PP -- vector of length NN, used for (NN+1)st column of
80 C augmented matrix B .
82 C START -- vector of length NN+1, normally the solution to the
83 C previous linear system; used to determine the index k .
85 C Output variables:
87 C START -- solution vector x of B x = b (defined above).
89 C IFLAG -- normally unchanged on output. If the conjugate gradient
90 C iteration fails to converge in 10*(NN+1) iterations (most
91 C likely due to a singular Jacobian matrix), PCGDS returns
92 C with IFLAG = 4 , and does not compute x.
94 C Working storage:
96 C WORK -- array of length 6*(NN+1) + LENAA :
98 C WORK(1..NN+1) = temporary working storage;
100 C WORK(NN+2..2NN+2) = intermediate solution vector z for Mz=u,
101 C input value is used as initial estimate for z;
103 C WORK(2NN+3..3NN+3) = intermediate solution vector z for Mz=b,
104 C input value is used as initial estimate for z;
106 C WORK(3NN+4..4NN+4) = storage for residual vectors;
108 C WORK(4NN+5..5NN+5) = storage for direction vectors;
110 C WORK(5NN+6.. * ) = storage for the preconditioning matrix
111 C Q, normally of length LENAA+NN+1. A storage scheme for Q
112 C (and AA) other than the default packed skyline storage
113 C scheme can be accomodated by simply extending the length
114 C of WORK (and MAXA), and prodiving different versions of
115 C the subroutines MULTDS, MFACDS, and QIMUDS.
118 C Three user-defined subroutines are required:
120 C MULTDS(y,AA,x,MAXA,NN,LENAA) -- computes y = AA x .
122 C MFACDS(NN,Q,LENAA,MAXA) -- computes the preconditioning matrix
123 C Q based on M. A copy of AA is placed in Q before the call;
124 C after the call, it is assumed that Q contains some factorization
125 C for the preconditioning matrix Q. If no preconditioning is
126 C required, MFACDS may be a dummy subroutine.
128 C QIMUDS(Q,f,MAXA,NN,LENAA) -- computes f := [Q**(-1)]*f for any
129 C vector f, given the factorization of Q produced by subroutine
130 C MFACDS. Again, if no preconditioning is required, QIMUDS
131 C may be a dummy subroutine.
134 C Subroutines and functions called:
136 C BLAS -- DAXPY, DCOPY, DDOT, DNRM2, DSCAL, IDAMAX
137 C D1MACH,MULTDS,MFACDS,QIMUDS
140 INTEGER IFLAG,IMAX,IND,J,K,LENAA,NN,MAXA(NN+2),NP1,NP2,N2P3,
141 & N3P4,N4P5,N5P6
142 DOUBLE PRECISION AA(LENAA),AB,AU,BB,BU,DZNRM,PBNPRD,PP(NN),
143 & PUNPRD,RBNPRD,RBTOL,RNPRD,RUNPRD,RUTOL,START(NN+1),
144 & STARTK,TEMP,UNRM,WORK(5*(NN+1)+LENAA+NN+1),
145 & ZLEN,ZTOL
146 LOGICAL STILLU,STILLB
148 DOUBLE PRECISION D1MACH,DDOT,DNRM2
149 INTEGER IDAMAX
152 C SET UP BASES FOR VECTORS STORED IN WORK ARRAY.
154 NP1=NN+1
155 NP2=NN+2
156 N2P3=(2*NN)+3
157 N3P4=(3*NN)+4
158 N4P5=(4*NN)+5
159 N5P6=(5*NN)+6
161 C FIND THE ELEMENT OF LARGEST MAGNITUDE IN THE INITIAL VECTOR, AND
162 C RECORD ITS POSITION IN K.
164 K=IDAMAX(NP1,START,1)
165 STARTK=START(K)
167 C INITIALIZE Q, SET VALUES OF MAXA(NN+1) AND MAXA(NN+2),
168 C COMPUTE PRECONDITIONER.
170 CALL DCOPY(LENAA,AA,1,WORK(N5P6),1)
171 MAXA(NN+1)=LENAA+1
172 MAXA(NN+2)=LENAA+NN+3-K
173 CALL MFACDS(NN,WORK(N5P6),LENAA,MAXA)
175 C COMPUTE ALL TOLERANCES NEEDED FOR EXIT CRITERIA.
177 CALL DCOPY(NN,PP,1,WORK,1)
178 IF (K .LT. NP1) WORK(K)=WORK(K)+1.0D0
179 UNRM=DNRM2(NN,WORK,1)
181 IMAX=10*NP1
182 STILLU=.TRUE.
183 STILLB=.TRUE.
184 ZTOL=100.0*D1MACH(4)
185 RBTOL=ZTOL*ABS(STARTK)
186 RUTOL=ZTOL*UNRM
188 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = u .
190 CALL MULTDS(WORK(N3P4),AA,WORK(NP2),MAXA,NN,LENAA)
191 WORK(N3P4+NN)=WORK(NP2+K-1)
192 IND=N3P4+K-1
193 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP2+NN)
194 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
195 CALL DAXPY(NN,-1.0D0,PP,1,WORK(N3P4),1)
196 IF (K .LT. NP1) WORK(IND)=WORK(IND)-1.0D0
197 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
199 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = u .
201 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
202 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
203 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
204 WORK(N4P5+NN)=WORK(K)
205 IF (K .LT. NP1) WORK(N4P5+K-1)=WORK(N4P5+K-1)+WORK(NP1)
207 RUNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
208 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
212 C DO WHILE ((STILLU) .AND. (J .LE. IMAX))
213 100 IF (.NOT. ((STILLU) .AND. (J .LE. IMAX)) ) GO TO 200
215 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
216 IF (SQRT(RUNPRD) .GT. RUTOL) THEN
217 IF (PUNPRD .EQ. 0.0) THEN
218 CALL MULTDS(WORK(N3P4),AA,WORK(NP2),MAXA,NN,LENAA)
219 WORK(N3P4+NN)=WORK(NP2+K-1)
220 IND=N3P4+K-1
221 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP2+NN)
222 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
223 CALL DAXPY(NN,-1.0D0,PP,1,WORK(N3P4),1)
224 IF (K .LT. NP1) WORK(N3P4+K-1)=WORK(N3P4+K-1)-1.0D0
225 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
226 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
227 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
228 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
229 WORK(N4P5+NN)=WORK(K)
230 IND=N4P5+K-1
231 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP1)
232 RUNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
233 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
234 IF (SQRT(RUNPRD) .LE. RUTOL) THEN
235 STILLU=.FALSE.
236 ENDIF
237 ENDIF
238 IF (STILLU) THEN
239 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED||.
240 AU=RUNPRD/PUNPRD
241 CALL DCOPY(NP1,WORK(NP2),1,WORK,1)
242 CALL DAXPY(NP1,AU,WORK(N4P5),1,WORK(NP2),1)
243 CALL DAXPY(NP1,-1.0D0,WORK(NP2),1,WORK,1)
244 ZLEN=DNRM2(NP1,WORK(NP2),1)
245 DZNRM=DNRM2(NP1,WORK,1)
246 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
247 IF ( (DZNRM/ZLEN) .LT. ZTOL) STILLU=.FALSE.
248 ENDIF
249 ELSE
250 STILLU=.FALSE.
251 ENDIF
253 C IF NO EXIT CRITERIA FOR Mz=u HAVE BEEN MET, CONTINUE.
254 IF (STILLU) THEN
255 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
256 CALL MULTDS(WORK,AA,WORK(N4P5),MAXA,NN,LENAA)
257 WORK(NP1)=WORK(N4P5+K-1)
258 IF (K .LT. NP1) WORK(K)=WORK(K)+WORK(N4P5+NN)
259 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
260 CALL DAXPY(NP1,-AU,WORK,1,WORK(N3P4),1)
261 RNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
262 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
263 BU=RNPRD/RUNPRD
264 RUNPRD=RNPRD
265 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
266 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
267 CALL MULTDS(START,AA,WORK,MAXA,NN,LENAA)
268 START(NP1)=WORK(K)
269 IF (K .LT. NP1) START(K)=START(K)+WORK(NP1)
270 CALL DAXPY(NP1,BU,WORK(N4P5),1,START,1)
271 CALL DCOPY(NP1,START,1,WORK(N4P5),1)
272 PUNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
273 ENDIF
275 J=J+1
276 GO TO 100
277 200 CONTINUE
278 C END DO
280 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
282 IF (J .GT. IMAX) THEN
283 IFLAG=4
284 RETURN
285 ENDIF
287 C COMPUTE INITIAL RESIDUAL VECTOR FOR THE SYSTEM M z = b .
289 CALL MULTDS(WORK(N3P4),AA,WORK(N2P3),MAXA,NN,LENAA)
290 WORK(N3P4+NN)=WORK(N2P3+K-1)
291 IND=N3P4+K-1
292 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(N2P3+NN)
293 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
294 WORK(N3P4+NN)=STARTK+WORK(N3P4+NN)
295 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
297 C COMPUTE INITIAL DIRECTION VECTOR, ALL INNER PRODUCTS FOR M z = b .
299 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
300 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
301 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
302 WORK(N4P5+NN)=WORK(K)
303 IF (K .LT. NP1) WORK(N4P5+K-1)=WORK(N4P5+K-1)+WORK(NP1)
305 RBNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
306 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
310 C DO WHILE ( STILLB .AND. (J .LE. IMAX) )
311 300 IF (.NOT. ( STILLB .AND. (J .LE. IMAX) ) ) GO TO 400
313 C IF ||RESIDUAL|| IS STILL NOT SMALL ENOUGH, CONTINUE.
314 IF (SQRT(RBNPRD) .GT. RBTOL) THEN
315 IF (PBNPRD .EQ. 0.0) THEN
316 CALL MULTDS(WORK(N3P4),AA,WORK(N2P3),MAXA,NN,LENAA)
317 WORK(N3P4+NN)=WORK(N2P3+K-1)
318 IND=N3P4+K-1
319 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(N2P3+NN)
320 CALL DSCAL(NP1,-1.0D0,WORK(N3P4),1)
321 WORK(N3P4+NN)=STARTK+WORK(N3P4+NN)
322 CALL QIMUDS(WORK(N5P6),WORK(N3P4),MAXA,NN,LENAA)
323 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
324 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
325 CALL MULTDS(WORK(N4P5),AA,WORK,MAXA,NN,LENAA)
326 WORK(N4P5+NN)=WORK(K)
327 IND=N4P5+K-1
328 IF (K .LT. NP1) WORK(IND)=WORK(IND)+WORK(NP1)
329 RBNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
330 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
331 IF (SQRT(RBNPRD) .LE. RBTOL) THEN
332 STILLB=.FALSE.
333 ENDIF
334 ENDIF
335 IF (STILLB) THEN
336 C UPDATE SOLUTION VECTOR; COMPUTE ||DELTA SOL||/||UPDATED|| .
337 AB=RBNPRD/PBNPRD
338 CALL DCOPY(NP1,WORK(N2P3),1,WORK,1)
339 CALL DAXPY(NP1,AB,WORK(N4P5),1,WORK(N2P3),1)
340 CALL DAXPY(NP1,-1.0D0,WORK(N2P3),1,WORK,1)
341 ZLEN=DNRM2(NP1,WORK(N2P3),1)
342 DZNRM=DNRM2(NP1,WORK,1)
343 C IF RELATIVE CHANGE IN SOLUTIONS IS SMALL ENOUGH, EXIT.
344 IF ( (DZNRM/ZLEN) .LT. ZTOL) STILLB=.FALSE.
345 ENDIF
346 ELSE
347 STILLB=.FALSE.
348 ENDIF
350 C IF NO EXIT CRITERIA FOR Mz=b HAVE BEEN MET, CONTINUE.
351 IF (STILLB) THEN
352 C UPDATE RESIDUAL VECTOR; COMPUTE (RNEW,RNEW), ||RNEW|| .
353 CALL MULTDS(WORK,AA,WORK(N4P5),MAXA,NN,LENAA)
354 WORK(NP1)=WORK(N4P5+K-1)
355 IF (K .LT. NP1) WORK(K)=WORK(K)+WORK(N4P5+NN)
356 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
357 CALL DAXPY(NP1,-AB,WORK,1,WORK(N3P4),1)
358 RNPRD=DDOT(NP1,WORK(N3P4),1,WORK(N3P4),1)
359 C UPDATE DIRECTION VECTOR; COMPUTE (PNEW,PNEW).
360 BB=RNPRD/RBNPRD
361 RBNPRD=RNPRD
362 CALL DCOPY(NP1,WORK(N3P4),1,WORK,1)
363 CALL QIMUDS(WORK(N5P6),WORK,MAXA,NN,LENAA)
364 CALL MULTDS(START,AA,WORK,MAXA,NN,LENAA)
365 START(NP1)=WORK(K)
366 IF (K .LT. NP1) START(K)=START(K)+WORK(NP1)
367 CALL DAXPY(NP1,BB,WORK(N4P5),1,START,1)
368 CALL DCOPY(NP1,START,1,WORK(N4P5),1)
369 PBNPRD=DDOT(NP1,WORK(N4P5),1,WORK(N4P5),1)
370 ENDIF
372 J=J+1
373 GO TO 300
374 400 CONTINUE
375 C END DO
377 C SET ERROR FLAG IF THE CONJUGATE GRADIENT ITERATION DID NOT CONVERGE.
379 IF (J .GT. IMAX) THEN
380 IFLAG=4
381 RETURN
382 ENDIF
384 C COMPUTE FINAL SOLUTION VECTOR X, RETURN IT IN START.
386 TEMP=-WORK(N2P3+NN)/(1.0D0+WORK(NP2+NN))
387 CALL DCOPY(NP1,WORK(N2P3),1,START,1)
388 CALL DAXPY(NP1,TEMP,WORK(NP2),1,START,1)
390 RETURN