Bump oldest cmake, compiler and CUDA versions required
[gromacs.git] / src / gromacs / linearalgebra / gmx_lapack / slasd3.cpp
blob394a2978aadc0b426f03e3b33f3ee7f20b8f40a4
1 #include <cmath>
2 #include "../gmx_blas.h"
3 #include "../gmx_lapack.h"
5 void
6 F77_FUNC(slasd3,SLASD3)(int *nl,
7 int *nr,
8 int *sqre,
9 int *k,
10 float *d__,
11 float *q,
12 int *ldq,
13 float *dsigma,
14 float *u,
15 int *ldu,
16 float *u2,
17 int *ldu2,
18 float *vt,
19 int *ldvt,
20 float *vt2,
21 int *ldvt2,
22 int *idxc,
23 int *ctot,
24 float *z__,
25 int *info)
27 int q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
28 vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
29 float d__2;
31 int i__, j, m, n, jc;
32 float rho;
33 int nlp1, nlp2, nrp1;
34 float temp;
35 int ctemp;
36 int ktemp;
37 int c__1 = 1;
38 int c__0 = 0;
39 float zero = 0.0;
40 float one = 1.0;
42 --d__;
43 q_dim1 = *ldq;
44 q_offset = 1 + q_dim1;
45 q -= q_offset;
46 --dsigma;
47 u_dim1 = *ldu;
48 u_offset = 1 + u_dim1;
49 u -= u_offset;
50 u2_dim1 = *ldu2;
51 u2_offset = 1 + u2_dim1;
52 u2 -= u2_offset;
53 vt_dim1 = *ldvt;
54 vt_offset = 1 + vt_dim1;
55 vt -= vt_offset;
56 vt2_dim1 = *ldvt2;
57 vt2_offset = 1 + vt2_dim1;
58 vt2 -= vt2_offset;
59 --idxc;
60 --ctot;
61 --z__;
63 /* Function Body */
64 *info = 0;
66 if (*nl < 1) {
67 *info = -1;
68 } else if (*nr < 1) {
69 *info = -2;
70 } else if (*sqre != 1 && *sqre != 0) {
71 *info = -3;
74 n = *nl + *nr + 1;
75 m = n + *sqre;
76 nlp1 = *nl + 1;
77 nlp2 = *nl + 2;
79 if (*k == 1) {
80 d__[1] = std::abs(z__[1]);
81 F77_FUNC(scopy,SCOPY)(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
82 if (z__[1] > 0.) {
83 F77_FUNC(scopy,SCOPY)(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
84 } else {
85 i__1 = n;
86 for (i__ = 1; i__ <= i__1; ++i__) {
87 u[i__ + u_dim1] = -u2[i__ + u2_dim1];
90 return;
93 F77_FUNC(scopy,SCOPY)(k, &z__[1], &c__1, &q[q_offset], &c__1);
95 rho = F77_FUNC(snrm2,SNRM2)(k, &z__[1], &c__1);
96 F77_FUNC(slascl,SLASCL)("G", &c__0, &c__0, &rho, &one, k, &c__1, &z__[1], k, info);
97 rho *= rho;
100 i__1 = *k;
101 for (j = 1; j <= i__1; ++j) {
102 F77_FUNC(slasd4,SLASD4)(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
103 &vt[j * vt_dim1 + 1], info);
105 if (*info != 0) {
106 return;
110 i__1 = *k;
111 for (i__ = 1; i__ <= i__1; ++i__) {
112 z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
113 i__2 = i__ - 1;
114 for (j = 1; j <= i__2; ++j) {
115 z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
116 i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
118 i__2 = *k - 1;
119 for (j = i__; j <= i__2; ++j) {
120 z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
121 i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
123 d__2 = std::sqrt(std::abs(z__[i__]));
124 z__[i__] = (q[i__ + q_dim1] > 0) ? d__2 : -d__2;
127 i__1 = *k;
128 for (i__ = 1; i__ <= i__1; ++i__) {
129 vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
130 vt_dim1 + 1];
131 u[i__ * u_dim1 + 1] = -1.;
132 i__2 = *k;
133 for (j = 2; j <= i__2; ++j) {
134 vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
135 * vt_dim1];
136 u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
138 temp = F77_FUNC(snrm2,SNRM2)(k, &u[i__ * u_dim1 + 1], &c__1);
139 q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
140 i__2 = *k;
141 for (j = 2; j <= i__2; ++j) {
142 jc = idxc[j];
143 q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
147 if (*k == 2) {
148 F77_FUNC(sgemm,SGEMM)("N", "N", &n, k, k, &one, &u2[u2_offset], ldu2, &q[q_offset],
149 ldq, &zero, &u[u_offset], ldu);
150 goto L100;
152 if (ctot[1] > 0) {
153 F77_FUNC(sgemm,SGEMM)("N", "N", nl, k, &ctot[1], &one, &u2[(u2_dim1 << 1) + 1],
154 ldu2, &q[q_dim1 + 2], ldq, &zero, &u[u_dim1 + 1], ldu);
155 if (ctot[3] > 0) {
156 ktemp = ctot[1] + 2 + ctot[2];
157 F77_FUNC(sgemm,SGEMM)("N", "N", nl, k, &ctot[3], &one, &u2[ktemp * u2_dim1 + 1]
158 , ldu2, &q[ktemp + q_dim1], ldq, &one, &u[u_dim1 + 1],
159 ldu);
161 } else if (ctot[3] > 0) {
162 ktemp = ctot[1] + 2 + ctot[2];
163 F77_FUNC(sgemm,SGEMM)("N", "N", nl, k, &ctot[3], &one, &u2[ktemp * u2_dim1 + 1],
164 ldu2, &q[ktemp + q_dim1], ldq, &zero, &u[u_dim1 + 1], ldu);
165 } else {
166 F77_FUNC(slacpy,SLACPY)("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
168 F77_FUNC(scopy,SCOPY)(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
169 ktemp = ctot[1] + 2;
170 ctemp = ctot[2] + ctot[3];
171 F77_FUNC(sgemm,SGEMM)("N", "N", nr, k, &ctemp, &one, &u2[nlp2 + ktemp * u2_dim1], ldu2,
172 &q[ktemp + q_dim1], ldq, &zero, &u[nlp2 + u_dim1], ldu);
174 L100:
175 i__1 = *k;
176 for (i__ = 1; i__ <= i__1; ++i__) {
177 temp = F77_FUNC(snrm2,SNRM2)(k, &vt[i__ * vt_dim1 + 1], &c__1);
178 q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
179 i__2 = *k;
180 for (j = 2; j <= i__2; ++j) {
181 jc = idxc[j];
182 q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
186 if (*k == 2) {
187 F77_FUNC(sgemm,SGEMM)("N", "N", k, &m, k, &one, &q[q_offset], ldq, &vt2[vt2_offset]
188 , ldvt2, &zero, &vt[vt_offset], ldvt);
189 return;
191 ktemp = ctot[1] + 1;
192 F77_FUNC(sgemm,SGEMM)("N", "N", k, &nlp1, &ktemp, &one, &q[q_dim1 + 1], ldq, &vt2[
193 vt2_dim1 + 1], ldvt2, &zero, &vt[vt_dim1 + 1], ldvt);
194 ktemp = ctot[1] + 2 + ctot[2];
195 if (ktemp <= *ldvt2) {
196 F77_FUNC(sgemm,SGEMM)("N", "N", k, &nlp1, &ctot[3], &one, &q[ktemp * q_dim1 + 1],
197 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &one, &vt[vt_dim1 + 1],
198 ldvt);
201 ktemp = ctot[1] + 1;
202 nrp1 = *nr + *sqre;
203 if (ktemp > 1) {
204 i__1 = *k;
205 for (i__ = 1; i__ <= i__1; ++i__) {
206 q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
208 i__1 = m;
209 for (i__ = nlp2; i__ <= i__1; ++i__) {
210 vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
213 ctemp = ctot[2] + 1 + ctot[3];
214 F77_FUNC(sgemm,SGEMM)("N", "N", k, &nrp1, &ctemp, &one, &q[ktemp * q_dim1 + 1], ldq, &
215 vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &zero, &vt[nlp2 * vt_dim1 +
216 1], ldvt);
218 return;